From cfba1808fdbd300b442cd7c811d4c8738948a529 Mon Sep 17 00:00:00 2001
From: Ponsuganth Ilangovan Ponkumar Ilango
 <pponkumar@geophysik.uni-muenchen.de>
Date: Mon, 15 Jul 2024 19:46:56 +0200
Subject: [PATCH] Update generate.py and regenerate operators

---
 generate/generate.py                          |    9 +-
 generate/requirements.txt                     |    2 +-
 operators.toml                                |    6 +-
 operators/curl_curl/CMakeLists.txt            |   16 +-
 .../curl_curl/N1E1ElementwiseCurlCurl.cpp     |    9 +-
 .../curl_curl/N1E1ElementwiseCurlCurl.hpp     |  122 +-
 ...pply_N1E1ElementwiseCurlCurl_macro_3D.cpp} |    6 +-
 ...lues_N1E1ElementwiseCurlCurl_macro_3D.cpp} |    6 +-
 ...pply_N1E1ElementwiseCurlCurl_macro_3D.cpp} |    6 +-
 ...lues_N1E1ElementwiseCurlCurl_macro_3D.cpp} |    6 +-
 ...trix_N1E1ElementwiseCurlCurl_macro_3D.cpp} |    6 +-
 operators/diffusion/CMakeLists.txt            |   96 +-
 .../diffusion/P1ElementwiseDiffusion.cpp      |   18 +-
 .../diffusion/P1ElementwiseDiffusion.hpp      |  199 +-
 .../diffusion/P2ElementwiseDiffusion.cpp      |   18 +-
 .../diffusion/P2ElementwiseDiffusion.hpp      |  219 +-
 .../P2ElementwiseDiffusionAnnulusMap.cpp      |    9 +-
 .../P2ElementwiseDiffusionAnnulusMap.hpp      |  140 +-
 ...lementwiseDiffusionIcosahedralShellMap.cpp |    9 +-
 ...lementwiseDiffusionIcosahedralShellMap.hpp |  213 +-
 ...apply_P1ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...apply_P1ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...alues_P1ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...alues_P1ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} |    6 +-
 ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} |    6 +-
 ...DiffusionIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...DiffusionIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...apply_P2ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...apply_P2ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...alues_P2ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...alues_P2ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...apply_P1ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...apply_P1ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...alues_P1ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...alues_P1ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...atrix_P1ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...atrix_P1ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} |    6 +-
 ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} |    6 +-
 ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} |    6 +-
 ...DiffusionIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...DiffusionIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...DiffusionIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...apply_P2ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...apply_P2ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...alues_P2ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...alues_P2ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 ...atrix_P2ElementwiseDiffusion_macro_2D.cpp} |    6 +-
 ...atrix_P2ElementwiseDiffusion_macro_3D.cpp} |    6 +-
 operators/div_k_grad/CMakeLists.txt           |   96 +-
 .../div_k_grad/P1ElementwiseDivKGrad.cpp      |   18 +-
 .../div_k_grad/P1ElementwiseDivKGrad.hpp      |  211 +-
 .../div_k_grad/P2ElementwiseDivKGrad.cpp      |   18 +-
 .../div_k_grad/P2ElementwiseDivKGrad.hpp      |  243 +-
 .../P2ElementwiseDivKGradAnnulusMap.cpp       |    9 +-
 .../P2ElementwiseDivKGradAnnulusMap.hpp       |  152 +-
 ...ElementwiseDivKGradIcosahedralShellMap.cpp |    9 +-
 ...ElementwiseDivKGradIcosahedralShellMap.hpp |  225 +-
 ..._apply_P1ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ..._apply_P1ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ...Values_P1ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ...Values_P1ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} |    6 +-
 ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} |    6 +-
 ...eDivKGradIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...eDivKGradIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ..._apply_P2ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ..._apply_P2ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ...Values_P2ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ...Values_P2ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ..._apply_P1ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ..._apply_P1ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ...Values_P1ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ...Values_P1ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ...Matrix_P1ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ...Matrix_P1ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} |    6 +-
 ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} |    6 +-
 ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} |    6 +-
 ...eDivKGradIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...eDivKGradIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...eDivKGradIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ..._apply_P2ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ..._apply_P2ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ...Values_P2ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ...Values_P2ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 ...Matrix_P2ElementwiseDivKGrad_macro_2D.cpp} |    6 +-
 ...Matrix_P2ElementwiseDivKGrad_macro_3D.cpp} |    6 +-
 operators/divergence/CMakeLists.txt           |  100 +-
 ...oP1ElementwiseDivergenceAnnulusMap_0_0.cpp |    6 +-
 ...oP1ElementwiseDivergenceAnnulusMap_0_0.hpp |   93 +-
 ...oP1ElementwiseDivergenceAnnulusMap_0_1.cpp |    6 +-
 ...oP1ElementwiseDivergenceAnnulusMap_0_1.hpp |   93 +-
 ...twiseDivergenceIcosahedralShellMap_0_0.cpp |    6 +-
 ...twiseDivergenceIcosahedralShellMap_0_0.hpp |  141 +-
 ...twiseDivergenceIcosahedralShellMap_0_1.cpp |    6 +-
 ...twiseDivergenceIcosahedralShellMap_0_1.hpp |  141 +-
 ...twiseDivergenceIcosahedralShellMap_0_2.cpp |    6 +-
 ...twiseDivergenceIcosahedralShellMap_0_2.hpp |  141 +-
 .../P2ToP1ElementwiseDivergence_0_0.cpp       |   12 +-
 .../P2ToP1ElementwiseDivergence_0_0.hpp       |  145 +-
 .../P2ToP1ElementwiseDivergence_0_1.cpp       |   12 +-
 .../P2ToP1ElementwiseDivergence_0_1.hpp       |  145 +-
 .../P2ToP1ElementwiseDivergence_0_2.cpp       |    6 +-
 .../P2ToP1ElementwiseDivergence_0_2.hpp       |   85 +-
 ...wiseDivergenceAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...wiseDivergenceAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_0_macro_2D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_0_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_1_macro_2D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_1_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_2_macro_3D.cpp} |    6 +-
 ...wiseDivergenceAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...wiseDivergenceAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...wiseDivergenceAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...wiseDivergenceAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...genceIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_0_macro_2D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_0_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_0_macro_2D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_0_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_1_macro_2D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_1_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_1_macro_2D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_1_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_2_macro_3D.cpp} |    6 +-
 ...oP1ElementwiseDivergence_0_2_macro_3D.cpp} |    6 +-
 operators/epsilon/CMakeLists.txt              |  320 +-
 .../P2ElementwiseEpsilonAnnulusMap_0_0.cpp    |    9 +-
 .../P2ElementwiseEpsilonAnnulusMap_0_0.hpp    |  152 +-
 .../P2ElementwiseEpsilonAnnulusMap_0_1.cpp    |    6 +-
 .../P2ElementwiseEpsilonAnnulusMap_0_1.hpp    |  105 +-
 .../P2ElementwiseEpsilonAnnulusMap_1_0.cpp    |    6 +-
 .../P2ElementwiseEpsilonAnnulusMap_1_0.hpp    |  105 +-
 .../P2ElementwiseEpsilonAnnulusMap_1_1.cpp    |    9 +-
 .../P2ElementwiseEpsilonAnnulusMap_1_1.hpp    |  152 +-
 ...mentwiseEpsilonIcosahedralShellMap_0_0.cpp |    9 +-
 ...mentwiseEpsilonIcosahedralShellMap_0_0.hpp |  225 +-
 ...mentwiseEpsilonIcosahedralShellMap_0_1.cpp |    6 +-
 ...mentwiseEpsilonIcosahedralShellMap_0_1.hpp |  153 +-
 ...mentwiseEpsilonIcosahedralShellMap_0_2.cpp |    6 +-
 ...mentwiseEpsilonIcosahedralShellMap_0_2.hpp |  153 +-
 ...mentwiseEpsilonIcosahedralShellMap_1_0.cpp |    6 +-
 ...mentwiseEpsilonIcosahedralShellMap_1_0.hpp |  153 +-
 ...mentwiseEpsilonIcosahedralShellMap_1_1.cpp |    9 +-
 ...mentwiseEpsilonIcosahedralShellMap_1_1.hpp |  225 +-
 ...mentwiseEpsilonIcosahedralShellMap_1_2.cpp |    6 +-
 ...mentwiseEpsilonIcosahedralShellMap_1_2.hpp |  153 +-
 ...mentwiseEpsilonIcosahedralShellMap_2_0.cpp |    6 +-
 ...mentwiseEpsilonIcosahedralShellMap_2_0.hpp |  153 +-
 ...mentwiseEpsilonIcosahedralShellMap_2_1.cpp |    6 +-
 ...mentwiseEpsilonIcosahedralShellMap_2_1.hpp |  153 +-
 ...mentwiseEpsilonIcosahedralShellMap_2_2.cpp |    9 +-
 ...mentwiseEpsilonIcosahedralShellMap_2_2.hpp |  225 +-
 .../epsilon/P2ElementwiseEpsilon_0_0.cpp      |   18 +-
 .../epsilon/P2ElementwiseEpsilon_0_0.hpp      |  243 +-
 .../epsilon/P2ElementwiseEpsilon_0_1.cpp      |   12 +-
 .../epsilon/P2ElementwiseEpsilon_0_1.hpp      |  169 +-
 .../epsilon/P2ElementwiseEpsilon_0_2.cpp      |    6 +-
 .../epsilon/P2ElementwiseEpsilon_0_2.hpp      |   97 +-
 .../epsilon/P2ElementwiseEpsilon_1_0.cpp      |   12 +-
 .../epsilon/P2ElementwiseEpsilon_1_0.hpp      |  169 +-
 .../epsilon/P2ElementwiseEpsilon_1_1.cpp      |   18 +-
 .../epsilon/P2ElementwiseEpsilon_1_1.hpp      |  243 +-
 .../epsilon/P2ElementwiseEpsilon_1_2.cpp      |    6 +-
 .../epsilon/P2ElementwiseEpsilon_1_2.hpp      |   97 +-
 .../epsilon/P2ElementwiseEpsilon_2_0.cpp      |    6 +-
 .../epsilon/P2ElementwiseEpsilon_2_0.hpp      |   97 +-
 .../epsilon/P2ElementwiseEpsilon_2_1.cpp      |    6 +-
 .../epsilon/P2ElementwiseEpsilon_2_1.hpp      |   97 +-
 .../epsilon/P2ElementwiseEpsilon_2_2.cpp      |    9 +-
 .../epsilon/P2ElementwiseEpsilon_2_2.hpp      |  140 +-
 ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_0_macro_2D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_0_macro_3D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_0_0_macro_2D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_0_0_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_1_macro_2D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_1_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_2_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_0_macro_2D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_0_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_1_macro_2D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_1_macro_3D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_1_1_macro_2D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_1_1_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_2_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_2_0_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_2_1_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_2_2_macro_3D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_2_2_macro_3D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_1_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_1_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ...silonIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_0_macro_2D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_0_macro_3D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_0_0_macro_2D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_0_0_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_0_0_macro_2D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_0_0_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_1_macro_2D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_1_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_0_1_macro_2D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_0_1_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_0_2_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_0_2_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_0_macro_2D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_0_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_1_0_macro_2D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_1_0_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_1_macro_2D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_1_macro_3D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_1_1_macro_2D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_1_1_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_1_1_macro_2D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_1_1_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_1_2_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_1_2_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_2_0_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_2_0_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_2_1_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_2_1_macro_3D.cpp} |    6 +-
 ...ply_P2ElementwiseEpsilon_2_2_macro_3D.cpp} |    6 +-
 ...ues_P2ElementwiseEpsilon_2_2_macro_3D.cpp} |    6 +-
 ...rix_P2ElementwiseEpsilon_2_2_macro_3D.cpp} |    6 +-
 operators/full_stokes/CMakeLists.txt          |  320 +-
 .../P2ElementwiseFullStokesAnnulusMap_0_0.cpp |    9 +-
 .../P2ElementwiseFullStokesAnnulusMap_0_0.hpp |  153 +-
 .../P2ElementwiseFullStokesAnnulusMap_0_1.cpp |    6 +-
 .../P2ElementwiseFullStokesAnnulusMap_0_1.hpp |  105 +-
 .../P2ElementwiseFullStokesAnnulusMap_1_0.cpp |    6 +-
 .../P2ElementwiseFullStokesAnnulusMap_1_0.hpp |  105 +-
 .../P2ElementwiseFullStokesAnnulusMap_1_1.cpp |    9 +-
 .../P2ElementwiseFullStokesAnnulusMap_1_1.hpp |  153 +-
 ...twiseFullStokesIcosahedralShellMap_0_0.cpp |    9 +-
 ...twiseFullStokesIcosahedralShellMap_0_0.hpp |  225 +-
 ...twiseFullStokesIcosahedralShellMap_0_1.cpp |    6 +-
 ...twiseFullStokesIcosahedralShellMap_0_1.hpp |  153 +-
 ...twiseFullStokesIcosahedralShellMap_0_2.cpp |    6 +-
 ...twiseFullStokesIcosahedralShellMap_0_2.hpp |  153 +-
 ...twiseFullStokesIcosahedralShellMap_1_0.cpp |    6 +-
 ...twiseFullStokesIcosahedralShellMap_1_0.hpp |  153 +-
 ...twiseFullStokesIcosahedralShellMap_1_1.cpp |    9 +-
 ...twiseFullStokesIcosahedralShellMap_1_1.hpp |  225 +-
 ...twiseFullStokesIcosahedralShellMap_1_2.cpp |    6 +-
 ...twiseFullStokesIcosahedralShellMap_1_2.hpp |  153 +-
 ...twiseFullStokesIcosahedralShellMap_2_0.cpp |    6 +-
 ...twiseFullStokesIcosahedralShellMap_2_0.hpp |  153 +-
 ...twiseFullStokesIcosahedralShellMap_2_1.cpp |    6 +-
 ...twiseFullStokesIcosahedralShellMap_2_1.hpp |  153 +-
 ...twiseFullStokesIcosahedralShellMap_2_2.cpp |    9 +-
 ...twiseFullStokesIcosahedralShellMap_2_2.hpp |  225 +-
 .../P2ElementwiseFullStokes_0_0.cpp           |   18 +-
 .../P2ElementwiseFullStokes_0_0.hpp           |  245 +-
 .../P2ElementwiseFullStokes_0_1.cpp           |   12 +-
 .../P2ElementwiseFullStokes_0_1.hpp           |  169 +-
 .../P2ElementwiseFullStokes_0_2.cpp           |    6 +-
 .../P2ElementwiseFullStokes_0_2.hpp           |   97 +-
 .../P2ElementwiseFullStokes_1_0.cpp           |   12 +-
 .../P2ElementwiseFullStokes_1_0.hpp           |  169 +-
 .../P2ElementwiseFullStokes_1_1.cpp           |   18 +-
 .../P2ElementwiseFullStokes_1_1.hpp           |  245 +-
 .../P2ElementwiseFullStokes_1_2.cpp           |    6 +-
 .../P2ElementwiseFullStokes_1_2.hpp           |   97 +-
 .../P2ElementwiseFullStokes_2_0.cpp           |    6 +-
 .../P2ElementwiseFullStokes_2_0.hpp           |   97 +-
 .../P2ElementwiseFullStokes_2_1.cpp           |    6 +-
 .../P2ElementwiseFullStokes_2_1.hpp           |   97 +-
 .../P2ElementwiseFullStokes_2_2.cpp           |    9 +-
 .../P2ElementwiseFullStokes_2_2.hpp           |  141 +-
 ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_1_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_0_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_0_1_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_0_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_1_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_1_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_1_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_1_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_0_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_0_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_0_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_1_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_0_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_1_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} |    6 +-
 .../grad_rho_by_rho_dot_u/CMakeLists.txt      |   50 +-
 ...2VectorToP1ElementwiseGradRhoByRhoDotU.cpp |   12 +-
 ...2VectorToP1ElementwiseGradRhoByRhoDotU.hpp |  195 +-
 ...1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp |    6 +-
 ...1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp |  115 +-
 ...iseGradRhoByRhoDotUIcosahedralShellMap.cpp |    6 +-
 ...iseGradRhoByRhoDotUIcosahedralShellMap.hpp |  171 +-
 ...iseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp | 1071 +++
 ...oByRhoDotUIcosahedralShellMap_macro_3D.cpp | 7929 +++++++++++++++++
 ...P1ElementwiseGradRhoByRhoDotU_macro_2D.cpp |  874 ++
 ...P1ElementwiseGradRhoByRhoDotU_macro_3D.cpp | 6458 ++++++++++++++
 ...seGradRhoByRhoDotUAnnulusMap_macro_2D.cpp} |  173 +-
 ...seGradRhoByRhoDotUAnnulusMap_macro_2D.cpp} |  173 +-
 ...ByRhoDotUIcosahedralShellMap_macro_3D.cpp} | 1085 ++-
 ...ByRhoDotUIcosahedralShellMap_macro_3D.cpp} | 1085 ++-
 ...1ElementwiseGradRhoByRhoDotU_macro_2D.cpp} |  136 +-
 ...1ElementwiseGradRhoByRhoDotU_macro_3D.cpp} |  810 +-
 ...1ElementwiseGradRhoByRhoDotU_macro_2D.cpp} |  136 +-
 ...1ElementwiseGradRhoByRhoDotU_macro_3D.cpp} |  810 +-
 operators/gradient/CMakeLists.txt             |  100 +-
 ...1ToP2ElementwiseGradientAnnulusMap_0_0.cpp |    6 +-
 ...1ToP2ElementwiseGradientAnnulusMap_0_0.hpp |   93 +-
 ...1ToP2ElementwiseGradientAnnulusMap_1_0.cpp |    6 +-
 ...1ToP2ElementwiseGradientAnnulusMap_1_0.hpp |   93 +-
 ...entwiseGradientIcosahedralShellMap_0_0.cpp |    6 +-
 ...entwiseGradientIcosahedralShellMap_0_0.hpp |  141 +-
 ...entwiseGradientIcosahedralShellMap_1_0.cpp |    6 +-
 ...entwiseGradientIcosahedralShellMap_1_0.hpp |  141 +-
 ...entwiseGradientIcosahedralShellMap_2_0.cpp |    6 +-
 ...entwiseGradientIcosahedralShellMap_2_0.hpp |  141 +-
 .../P1ToP2ElementwiseGradient_0_0.cpp         |   12 +-
 .../P1ToP2ElementwiseGradient_0_0.hpp         |  145 +-
 .../P1ToP2ElementwiseGradient_1_0.cpp         |   12 +-
 .../P1ToP2ElementwiseGradient_1_0.hpp         |  145 +-
 .../P1ToP2ElementwiseGradient_2_0.cpp         |    6 +-
 .../P1ToP2ElementwiseGradient_2_0.hpp         |   85 +-
 ...ntwiseGradientAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...ntwiseGradientAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...dientIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...dientIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...dientIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_0_0_macro_2D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_0_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_1_0_macro_2D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_1_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_2_0_macro_3D.cpp} |    6 +-
 ...ntwiseGradientAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...ntwiseGradientAnnulusMap_0_0_macro_2D.cpp} |    6 +-
 ...ntwiseGradientAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...ntwiseGradientAnnulusMap_1_0_macro_2D.cpp} |    6 +-
 ...dientIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...dientIcosahedralShellMap_0_0_macro_3D.cpp} |    6 +-
 ...dientIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...dientIcosahedralShellMap_1_0_macro_3D.cpp} |    6 +-
 ...dientIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...dientIcosahedralShellMap_2_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_0_0_macro_2D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_0_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_0_0_macro_2D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_0_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_1_0_macro_2D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_1_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_1_0_macro_2D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_1_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_2_0_macro_3D.cpp} |    6 +-
 ...1ToP2ElementwiseGradient_2_0_macro_3D.cpp} |    6 +-
 operators/k_mass/CMakeLists.txt               |  168 +-
 operators/k_mass/P1ElementwiseKMass.cpp       |   18 +-
 operators/k_mass/P1ElementwiseKMass.hpp       |  211 +-
 .../k_mass/P1ElementwiseKMassAnnulusMap.cpp   |    9 +-
 .../k_mass/P1ElementwiseKMassAnnulusMap.hpp   |  136 +-
 .../P1ElementwiseKMassIcosahedralShellMap.cpp |    9 +-
 .../P1ElementwiseKMassIcosahedralShellMap.hpp |  209 +-
 operators/k_mass/P2ElementwiseKMass.cpp       |   18 +-
 operators/k_mass/P2ElementwiseKMass.hpp       |  243 +-
 .../k_mass/P2ElementwiseKMassAnnulusMap.cpp   |    9 +-
 .../k_mass/P2ElementwiseKMassAnnulusMap.hpp   |  152 +-
 .../P2ElementwiseKMassIcosahedralShellMap.cpp |    9 +-
 .../P2ElementwiseKMassIcosahedralShellMap.hpp |  225 +-
 operators/k_mass/P2ToP1ElementwiseKMass.cpp   |   12 +-
 operators/k_mass/P2ToP1ElementwiseKMass.hpp   |  161 +-
 .../P2ToP1ElementwiseKMassAnnulusMap.cpp      |    6 +-
 .../P2ToP1ElementwiseKMassAnnulusMap.hpp      |  101 +-
 ...oP1ElementwiseKMassIcosahedralShellMap.cpp |    6 +-
 ...oP1ElementwiseKMassIcosahedralShellMap.hpp |  149 +-
 ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...ass_apply_P1ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...ass_apply_P1ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...torValues_P1ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...torValues_P1ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...ass_apply_P2ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...ass_apply_P2ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...torValues_P2ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...torValues_P2ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...apply_P2ToP1ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...apply_P2ToP1ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...ass_apply_P1ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...ass_apply_P1ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...torValues_P1ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...torValues_P1ElementwiseKMass_macro_3D.cpp} |    6 +-
 ..._toMatrix_P1ElementwiseKMass_macro_2D.cpp} |    6 +-
 ..._toMatrix_P1ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...ass_apply_P2ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...ass_apply_P2ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...torValues_P2ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...torValues_P2ElementwiseKMass_macro_3D.cpp} |    6 +-
 ..._toMatrix_P2ElementwiseKMass_macro_2D.cpp} |    6 +-
 ..._toMatrix_P2ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...wiseKMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...apply_P2ToP1ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...apply_P2ToP1ElementwiseKMass_macro_3D.cpp} |    6 +-
 ...atrix_P2ToP1ElementwiseKMass_macro_2D.cpp} |    6 +-
 ...atrix_P2ToP1ElementwiseKMass_macro_3D.cpp} |    6 +-
 operators/mass/CMakeLists.txt                 |   96 +-
 operators/mass/P1ElementwiseMass.cpp          |   18 +-
 operators/mass/P1ElementwiseMass.hpp          |  199 +-
 operators/mass/P2ElementwiseMass.cpp          |   18 +-
 operators/mass/P2ElementwiseMass.hpp          |  219 +-
 .../mass/P2ElementwiseMassAnnulusMap.cpp      |    9 +-
 .../mass/P2ElementwiseMassAnnulusMap.hpp      |  140 +-
 .../P2ElementwiseMassIcosahedralShellMap.cpp  |    9 +-
 .../P2ElementwiseMassIcosahedralShellMap.hpp  |  213 +-
 ...Mass_apply_P1ElementwiseMass_macro_2D.cpp} |    6 +-
 ...Mass_apply_P1ElementwiseMass_macro_3D.cpp} |    6 +-
 ...atorValues_P1ElementwiseMass_macro_2D.cpp} |    6 +-
 ...atorValues_P1ElementwiseMass_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...twiseMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...twiseMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...Mass_apply_P2ElementwiseMass_macro_2D.cpp} |    6 +-
 ...Mass_apply_P2ElementwiseMass_macro_3D.cpp} |    6 +-
 ...atorValues_P2ElementwiseMass_macro_2D.cpp} |    6 +-
 ...atorValues_P2ElementwiseMass_macro_3D.cpp} |    6 +-
 ...Mass_apply_P1ElementwiseMass_macro_2D.cpp} |    6 +-
 ...Mass_apply_P1ElementwiseMass_macro_3D.cpp} |    6 +-
 ...atorValues_P1ElementwiseMass_macro_2D.cpp} |    6 +-
 ...atorValues_P1ElementwiseMass_macro_3D.cpp} |    6 +-
 ...s_toMatrix_P1ElementwiseMass_macro_2D.cpp} |    6 +-
 ...s_toMatrix_P1ElementwiseMass_macro_3D.cpp} |    6 +-
 ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} |    6 +-
 ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} |    6 +-
 ...twiseMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...twiseMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...twiseMassIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...Mass_apply_P2ElementwiseMass_macro_2D.cpp} |    6 +-
 ...Mass_apply_P2ElementwiseMass_macro_3D.cpp} |    6 +-
 ...atorValues_P2ElementwiseMass_macro_2D.cpp} |    6 +-
 ...atorValues_P2ElementwiseMass_macro_3D.cpp} |    6 +-
 ...s_toMatrix_P2ElementwiseMass_macro_2D.cpp} |    6 +-
 ...s_toMatrix_P2ElementwiseMass_macro_3D.cpp} |    6 +-
 operators/shear_heating/CMakeLists.txt        |   64 +-
 .../P2ElementwiseShearHeating.cpp             |   18 +-
 .../P2ElementwiseShearHeating.hpp             |  303 +-
 .../P2ElementwiseShearHeatingAnnulusMap.cpp   |    9 +-
 .../P2ElementwiseShearHeatingAnnulusMap.hpp   |  177 +-
 ...entwiseShearHeatingIcosahedralShellMap.cpp |    9 +-
 ...entwiseShearHeatingIcosahedralShellMap.hpp |  261 +-
 ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} |    6 +-
 ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} |    6 +-
 ...arHeatingIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...arHeatingIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...ly_P2ElementwiseShearHeating_macro_2D.cpp} |    6 +-
 ...ly_P2ElementwiseShearHeating_macro_3D.cpp} |    6 +-
 ...es_P2ElementwiseShearHeating_macro_2D.cpp} |    6 +-
 ...es_P2ElementwiseShearHeating_macro_3D.cpp} |    6 +-
 ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} |    6 +-
 ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} |    6 +-
 ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} |    6 +-
 ...arHeatingIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...arHeatingIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...arHeatingIcosahedralShellMap_macro_3D.cpp} |    6 +-
 ...ly_P2ElementwiseShearHeating_macro_2D.cpp} |    6 +-
 ...ly_P2ElementwiseShearHeating_macro_3D.cpp} |    6 +-
 ...es_P2ElementwiseShearHeating_macro_2D.cpp} |    6 +-
 ...es_P2ElementwiseShearHeating_macro_3D.cpp} |    6 +-
 ...ix_P2ElementwiseShearHeating_macro_2D.cpp} |    6 +-
 ...ix_P2ElementwiseShearHeating_macro_3D.cpp} |    6 +-
 625 files changed, 29931 insertions(+), 9806 deletions(-)
 rename operators/curl_curl/avx/{N1E1ElementwiseCurlCurl_apply_macro_3D.cpp => N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%)
 rename operators/curl_curl/avx/{N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp => N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%)
 rename operators/curl_curl/noarch/{N1E1ElementwiseCurlCurl_apply_macro_3D.cpp => N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%)
 rename operators/curl_curl/noarch/{N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp => N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%)
 rename operators/curl_curl/noarch/{N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp => N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%)
 rename operators/diffusion/avx/{P1ElementwiseDiffusion_apply_macro_2D.cpp => P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp} (99%)
 rename operators/diffusion/avx/{P1ElementwiseDiffusion_apply_macro_3D.cpp => P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/avx/{P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp} (99%)
 rename operators/diffusion/avx/{P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/avx/{P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%)
 rename operators/diffusion/avx/{P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%)
 rename operators/diffusion/avx/{P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/diffusion/avx/{P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/diffusion/avx/{P2ElementwiseDiffusion_apply_macro_2D.cpp => P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp} (99%)
 rename operators/diffusion/avx/{P2ElementwiseDiffusion_apply_macro_3D.cpp => P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/avx/{P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp} (99%)
 rename operators/diffusion/avx/{P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P1ElementwiseDiffusion_apply_macro_2D.cpp => P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp} (98%)
 rename operators/diffusion/noarch/{P1ElementwiseDiffusion_apply_macro_3D.cpp => P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp} (98%)
 rename operators/diffusion/noarch/{P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P1ElementwiseDiffusion_toMatrix_macro_2D.cpp => P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp} (98%)
 rename operators/diffusion/noarch/{P1ElementwiseDiffusion_toMatrix_macro_3D.cpp => P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusion_apply_macro_2D.cpp => P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusion_apply_macro_3D.cpp => P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp} (98%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusion_toMatrix_macro_2D.cpp => P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp} (99%)
 rename operators/diffusion/noarch/{P2ElementwiseDiffusion_toMatrix_macro_3D.cpp => P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp} (99%)
 rename operators/div_k_grad/avx/{P1ElementwiseDivKGrad_apply_macro_2D.cpp => P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp} (99%)
 rename operators/div_k_grad/avx/{P1ElementwiseDivKGrad_apply_macro_3D.cpp => P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/avx/{P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp} (99%)
 rename operators/div_k_grad/avx/{P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/avx/{P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (99%)
 rename operators/div_k_grad/avx/{P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (99%)
 rename operators/div_k_grad/avx/{P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/div_k_grad/avx/{P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/div_k_grad/avx/{P2ElementwiseDivKGrad_apply_macro_2D.cpp => P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp} (99%)
 rename operators/div_k_grad/avx/{P2ElementwiseDivKGrad_apply_macro_3D.cpp => P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/avx/{P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp} (99%)
 rename operators/div_k_grad/avx/{P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_apply_macro_2D.cpp => P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp} (97%)
 rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_apply_macro_3D.cpp => P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp} (97%)
 rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp => P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp} (97%)
 rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp => P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (98%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (98%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (98%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_apply_macro_2D.cpp => P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp} (98%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_apply_macro_3D.cpp => P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp} (98%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp => P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp} (98%)
 rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp => P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp} (99%)
 rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp} (98%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp} (98%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp} (98%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp} (98%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp} (98%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp} (98%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp} (98%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp} (98%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp} (99%)
 rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp => P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp => P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp => P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp => P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/avx/{P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (98%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (98%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (98%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (98%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (98%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (98%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp => P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp => P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp => P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp => P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp => P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp => P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp => P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp => P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%)
 rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp => P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp => P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp => P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp => P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/avx/{P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (98%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (98%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp => P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp => P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp => P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp => P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp => P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp => P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp => P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp => P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%)
 rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%)
 create mode 100644 operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
 create mode 100644 operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
 create mode 100644 operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
 create mode 100644 operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
 rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp} (78%)
 rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp} (80%)
 rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp} (82%)
 rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp} (84%)
 rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp} (74%)
 rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp} (78%)
 rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp} (77%)
 rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp} (81%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp} (99%)
 rename operators/gradient/avx/{P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp} (98%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp} (98%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp} (98%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp} (98%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp} (98%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp => P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp} (98%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp} (98%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp => P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp} (98%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp} (99%)
 rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P1ElementwiseKMass_apply_macro_2D.cpp => P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P1ElementwiseKMass_apply_macro_3D.cpp => P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P2ElementwiseKMass_apply_macro_2D.cpp => P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P2ElementwiseKMass_apply_macro_3D.cpp => P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/avx/{P2ToP1ElementwiseKMass_apply_macro_2D.cpp => P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp} (99%)
 rename operators/k_mass/avx/{P2ToP1ElementwiseKMass_apply_macro_3D.cpp => P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P1ElementwiseKMass_apply_macro_2D.cpp => P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp} (97%)
 rename operators/k_mass/noarch/{P1ElementwiseKMass_apply_macro_3D.cpp => P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp} (97%)
 rename operators/k_mass/noarch/{P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P1ElementwiseKMass_toMatrix_macro_2D.cpp => P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp} (97%)
 rename operators/k_mass/noarch/{P1ElementwiseKMass_toMatrix_macro_3D.cpp => P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ElementwiseKMass_apply_macro_2D.cpp => P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ElementwiseKMass_apply_macro_3D.cpp => P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ElementwiseKMass_toMatrix_macro_2D.cpp => P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ElementwiseKMass_toMatrix_macro_3D.cpp => P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp => P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ToP1ElementwiseKMass_apply_macro_2D.cpp => P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ToP1ElementwiseKMass_apply_macro_3D.cpp => P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/k_mass/noarch/{P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp => P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp} (98%)
 rename operators/k_mass/noarch/{P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp => P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp} (99%)
 rename operators/mass/avx/{P1ElementwiseMass_apply_macro_2D.cpp => P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp} (99%)
 rename operators/mass/avx/{P1ElementwiseMass_apply_macro_3D.cpp => P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/avx/{P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp} (98%)
 rename operators/mass/avx/{P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/avx/{P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp => P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (99%)
 rename operators/mass/avx/{P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (99%)
 rename operators/mass/avx/{P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/mass/avx/{P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/mass/avx/{P2ElementwiseMass_apply_macro_2D.cpp => P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp} (99%)
 rename operators/mass/avx/{P2ElementwiseMass_apply_macro_3D.cpp => P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/avx/{P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp} (99%)
 rename operators/mass/avx/{P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P1ElementwiseMass_apply_macro_2D.cpp => P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp} (97%)
 rename operators/mass/noarch/{P1ElementwiseMass_apply_macro_3D.cpp => P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp} (97%)
 rename operators/mass/noarch/{P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P1ElementwiseMass_toMatrix_macro_2D.cpp => P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp} (97%)
 rename operators/mass/noarch/{P1ElementwiseMass_toMatrix_macro_3D.cpp => P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp => P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/mass/noarch/{P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/mass/noarch/{P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (98%)
 rename operators/mass/noarch/{P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P2ElementwiseMass_apply_macro_2D.cpp => P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp} (98%)
 rename operators/mass/noarch/{P2ElementwiseMass_apply_macro_3D.cpp => P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp} (97%)
 rename operators/mass/noarch/{P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/mass/noarch/{P2ElementwiseMass_toMatrix_macro_2D.cpp => P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp} (98%)
 rename operators/mass/noarch/{P2ElementwiseMass_toMatrix_macro_3D.cpp => P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp} (99%)
 rename operators/shear_heating/avx/{P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (99%)
 rename operators/shear_heating/avx/{P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (99%)
 rename operators/shear_heating/avx/{P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/shear_heating/avx/{P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/shear_heating/avx/{P2ElementwiseShearHeating_apply_macro_2D.cpp => P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp} (99%)
 rename operators/shear_heating/avx/{P2ElementwiseShearHeating_apply_macro_3D.cpp => P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp} (99%)
 rename operators/shear_heating/avx/{P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp} (99%)
 rename operators/shear_heating/avx/{P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp} (99%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (98%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (98%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (98%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_apply_macro_2D.cpp => P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp} (98%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_apply_macro_3D.cpp => P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp} (99%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp} (97%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp} (99%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_toMatrix_macro_2D.cpp => P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp} (98%)
 rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_toMatrix_macro_3D.cpp => P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp} (99%)

diff --git a/generate/generate.py b/generate/generate.py
index e55020df..edb7e720 100644
--- a/generate/generate.py
+++ b/generate/generate.py
@@ -382,7 +382,6 @@ def generate_operator(
         operator = operator_generation.operators.HyTeGElementwiseOperator(
             name,
             symbolizer,
-            opts=optimizations,
             kernel_wrapper_types=kernel_types,
             type_descriptor=type_descriptor,
         )
@@ -400,20 +399,20 @@ def generate_operator(
                 blending=blending,  # type: ignore[call-arg] # kw-args are not supported by Callable
             )
 
-            operator.add_integral(
+            operator.add_volume_integral(
                 name="".join(name.split()),
-                dim=geometry.dimensions,
-                geometry=geometry,
-                integration_domain=operator_generation.operators.MacroIntegrationDomain.VOLUME,
+                volume_geometry=geometry,
                 quad=quad,
                 blending=blending,
                 form=form,
                 loop_strategy=loop_strategies[spec["loop-strategy"]],
+                optimizations=optimizations,
             )
 
         dir_path = os.path.join(args.output, form_str)
         operator.generate_class_code(
             dir_path,
+            class_files=operator_generation.operators.CppClassFiles.HEADER_IMPL_AND_VARIANTS,
             clang_format_binary=args.clang_format_binary,
         )
 
diff --git a/generate/requirements.txt b/generate/requirements.txt
index 0afd3ab8..930783ac 100644
--- a/generate/requirements.txt
+++ b/generate/requirements.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://test.pypi.org/simple/
 
-hog @ git+ssh://git@i10git.cs.fau.de/hyteg/hog@26f110bc235ad20bff58416a4dba4e1730e74c4e
+hog @ git+https://i10git.cs.fau.de/hyteg/hog@516f33ba88809c2174d316883f09221ed0e7ce02
 tomli >= 1.1.0 ; python_version < "3.11"
 clang-format
diff --git a/operators.toml b/operators.toml
index 01bda864..806f89a2 100644
--- a/operators.toml
+++ b/operators.toml
@@ -367,7 +367,7 @@ dimensions    = [2, 3]
 quadrature    = 3
 blending      = "IdentityMap"
 loop-strategy = "sawtooth"
-optimizations = ["quadloops"]
+optimizations = ["moveconstants", "vectorize", "quadloops"]
 
 [[grad_rho_by_rho_dot_u]]
 trial-space   = "P2Vector"
@@ -377,7 +377,7 @@ dimensions    = [2]
 quadrature    = 3
 blending      = "AnnulusMap"
 loop-strategy = "sawtooth"
-optimizations = ["quadloops"]
+optimizations = ["moveconstants", "vectorize", "quadloops"]
 
 [[grad_rho_by_rho_dot_u]]
 trial-space   = "P2Vector"
@@ -387,4 +387,4 @@ dimensions    = [3]
 quadrature    = 3
 blending      = "IcosahedralShellMap"
 loop-strategy = "sawtooth"
-optimizations = ["quadloops"]
+optimizations = ["moveconstants", "vectorize", "quadloops"]
diff --git a/operators/curl_curl/CMakeLists.txt b/operators/curl_curl/CMakeLists.txt
index 8599561f..56728228 100644
--- a/operators/curl_curl/CMakeLists.txt
+++ b/operators/curl_curl/CMakeLists.txt
@@ -7,15 +7,15 @@ add_library( opgen-curl_curl
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-curl_curl PRIVATE
 
-      avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp
-      avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp
+      avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
+      avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
+      noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp
-      avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
+      avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -26,9 +26,9 @@ else()
 
    target_sources(opgen-curl_curl PRIVATE
 
-      noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp
-      noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp
+      noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
+      noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
+      noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp b/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp
index 84d74b53..92232dfb 100644
--- a/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp
+++ b/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp
@@ -118,7 +118,7 @@ void N1E1ElementwiseCurlCurl::apply( const n1e1::N1E1VectorFunction< real_t >& s
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_N1E1ElementwiseCurlCurl_macro_3D(
 
              _data_dst,
              _data_src,
@@ -136,6 +136,7 @@ void N1E1ElementwiseCurlCurl::apply( const n1e1::N1E1VectorFunction< real_t >& s
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -200,7 +201,7 @@ void N1E1ElementwiseCurlCurl::toMatrix( const std::shared_ptr< SparseMatrixProxy
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_N1E1ElementwiseCurlCurl_macro_3D(
 
              _data_dst,
              _data_src,
@@ -221,6 +222,7 @@ void N1E1ElementwiseCurlCurl::toMatrix( const std::shared_ptr< SparseMatrixProxy
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -278,7 +280,7 @@ void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D(
 
                 _data_invDiag_,
                 macro_vertex_coord_id_0comp0,
@@ -295,6 +297,7 @@ void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp b/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp
index b66be43c..5560334f 100644
--- a/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp
+++ b/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/n1e1functionspace/N1E1MacroCell.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -68,74 +70,88 @@ class N1E1ElementwiseCurlCurl : public Operator< n1e1::N1E1VectorFunction< real_
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: N1E1ElementwiseCurlCurl
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Keast 0 | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    180     202      37       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_N1E1ElementwiseCurlCurl_macro_3D( real_t* RESTRICT _data_dst,
+                                                real_t* RESTRICT _data_src,
+                                                real_t           macro_vertex_coord_id_0comp0,
+                                                real_t           macro_vertex_coord_id_0comp1,
+                                                real_t           macro_vertex_coord_id_0comp2,
+                                                real_t           macro_vertex_coord_id_1comp0,
+                                                real_t           macro_vertex_coord_id_1comp1,
+                                                real_t           macro_vertex_coord_id_1comp2,
+                                                real_t           macro_vertex_coord_id_2comp0,
+                                                real_t           macro_vertex_coord_id_2comp1,
+                                                real_t           macro_vertex_coord_id_2comp2,
+                                                real_t           macro_vertex_coord_id_3comp0,
+                                                real_t           macro_vertex_coord_id_3comp1,
+                                                real_t           macro_vertex_coord_id_3comp2,
+                                                int64_t          micro_edges_per_macro_edge,
+                                                real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: N1E1ElementwiseCurlCurl
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Keast 0 | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    144     253      37       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_src,
-                           const Cell&                          cell,
-                           const uint_t                         level,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_N1E1ElementwiseCurlCurl_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                                   idx_t* RESTRICT                      _data_src,
+                                                   const Cell&                          cell,
+                                                   const uint_t                         level,
+                                                   real_t                               macro_vertex_coord_id_0comp0,
+                                                   real_t                               macro_vertex_coord_id_0comp1,
+                                                   real_t                               macro_vertex_coord_id_0comp2,
+                                                   real_t                               macro_vertex_coord_id_1comp0,
+                                                   real_t                               macro_vertex_coord_id_1comp1,
+                                                   real_t                               macro_vertex_coord_id_1comp2,
+                                                   real_t                               macro_vertex_coord_id_2comp0,
+                                                   real_t                               macro_vertex_coord_id_2comp1,
+                                                   real_t                               macro_vertex_coord_id_2comp2,
+                                                   real_t                               macro_vertex_coord_id_3comp0,
+                                                   real_t                               macro_vertex_coord_id_3comp1,
+                                                   real_t                               macro_vertex_coord_id_3comp2,
+                                                   std::shared_ptr< SparseMatrixProxy > mat,
+                                                   int64_t                              micro_edges_per_macro_edge,
+                                                   real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: N1E1ElementwiseCurlCurl
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Keast 0 | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    120     115      37       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t* RESTRICT _data_invDiag_,
+                                                                               real_t           macro_vertex_coord_id_0comp0,
+                                                                               real_t           macro_vertex_coord_id_0comp1,
+                                                                               real_t           macro_vertex_coord_id_0comp2,
+                                                                               real_t           macro_vertex_coord_id_1comp0,
+                                                                               real_t           macro_vertex_coord_id_1comp1,
+                                                                               real_t           macro_vertex_coord_id_1comp2,
+                                                                               real_t           macro_vertex_coord_id_2comp0,
+                                                                               real_t           macro_vertex_coord_id_2comp1,
+                                                                               real_t           macro_vertex_coord_id_2comp2,
+                                                                               real_t           macro_vertex_coord_id_3comp0,
+                                                                               real_t           macro_vertex_coord_id_3comp1,
+                                                                               real_t           macro_vertex_coord_id_3comp2,
+                                                                               int64_t          micro_edges_per_macro_edge,
+                                                                               real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< n1e1::N1E1VectorFunction< real_t > > invDiag_;
 };
diff --git a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
similarity index 99%
rename from operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp
rename to operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
index 25560670..16022bda 100644
--- a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp
+++ b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void N1E1ElementwiseCurlCurl::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void N1E1ElementwiseCurlCurl::apply_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
similarity index 99%
rename from operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
index 4397e237..9e45450f 100644
--- a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
similarity index 99%
rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp
rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
index b2ddf686..7a290c00 100644
--- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp
+++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void N1E1ElementwiseCurlCurl::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void N1E1ElementwiseCurlCurl::apply_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
similarity index 99%
rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
index f8a8c7e2..7279d4a6 100644
--- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp
similarity index 99%
rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp
rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp
index a46f9611..cf06b5a5 100644
--- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp
+++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void N1E1ElementwiseCurlCurl::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, const Cell& cell, const uint_t level, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void N1E1ElementwiseCurlCurl::toMatrix_N1E1ElementwiseCurlCurl_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, const Cell& cell, const uint_t level, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/CMakeLists.txt b/operators/diffusion/CMakeLists.txt
index 89d33aa9..694448a6 100644
--- a/operators/diffusion/CMakeLists.txt
+++ b/operators/diffusion/CMakeLists.txt
@@ -13,40 +13,40 @@ add_library( opgen-diffusion
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-diffusion PRIVATE
 
-      avx/P1ElementwiseDiffusion_apply_macro_2D.cpp
-      avx/P1ElementwiseDiffusion_apply_macro_3D.cpp
-      avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseDiffusion_apply_macro_2D.cpp
-      avx/P2ElementwiseDiffusion_apply_macro_3D.cpp
-      avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp
+      avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
+      avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
+      avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
+      avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
+      avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
+      avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
+      avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
+      avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
+      noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp
+      noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp
+      noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp
+      noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P1ElementwiseDiffusion_apply_macro_2D.cpp
-      avx/P1ElementwiseDiffusion_apply_macro_3D.cpp
-      avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseDiffusion_apply_macro_2D.cpp
-      avx/P2ElementwiseDiffusion_apply_macro_3D.cpp
-      avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
+      avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
+      avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
+      avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
+      avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
+      avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
+      avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
+      avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -57,24 +57,24 @@ else()
 
    target_sources(opgen-diffusion PRIVATE
 
-      noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp
-      noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp
-      noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
-      noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
-      noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp
-      noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp
-      noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp
+      noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
+      noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
+      noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
+      noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
+      noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp
+      noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp
+      noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
+      noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
+      noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
+      noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
+      noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp
+      noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/diffusion/P1ElementwiseDiffusion.cpp b/operators/diffusion/P1ElementwiseDiffusion.cpp
index f61cda10..48131e46 100644
--- a/operators/diffusion/P1ElementwiseDiffusion.cpp
+++ b/operators/diffusion/P1ElementwiseDiffusion.cpp
@@ -126,7 +126,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ElementwiseDiffusion_macro_3D(
 
              _data_dst,
              _data_src,
@@ -144,6 +144,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -191,7 +192,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ElementwiseDiffusion_macro_2D(
 
              _data_dst,
              _data_src,
@@ -203,6 +204,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -263,7 +265,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ElementwiseDiffusion_macro_3D(
 
              _data_dst,
              _data_src,
@@ -282,6 +284,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -310,7 +313,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ElementwiseDiffusion_macro_2D(
 
              _data_dst,
              _data_src,
@@ -323,6 +326,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -371,7 +375,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D(
 
                 _data_invDiag_,
                 macro_vertex_coord_id_0comp0,
@@ -388,6 +392,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -426,7 +431,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D(
 
                 _data_invDiag_,
                 macro_vertex_coord_id_0comp0,
@@ -437,6 +442,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/diffusion/P1ElementwiseDiffusion.hpp b/operators/diffusion/P1ElementwiseDiffusion.hpp
index 3829a003..af6e18e0 100644
--- a/operators/diffusion/P1ElementwiseDiffusion.hpp
+++ b/operators/diffusion/P1ElementwiseDiffusion.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,120 +84,149 @@ class P1ElementwiseDiffusion : public Operator< P1Function< real_t >, P1Function
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ElementwiseDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Centroid rule | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     49      49      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P1ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_dst,
+                                               real_t* RESTRICT _data_src,
+                                               real_t           macro_vertex_coord_id_0comp0,
+                                               real_t           macro_vertex_coord_id_0comp1,
+                                               real_t           macro_vertex_coord_id_1comp0,
+                                               real_t           macro_vertex_coord_id_1comp1,
+                                               real_t           macro_vertex_coord_id_2comp0,
+                                               real_t           macro_vertex_coord_id_2comp1,
+                                               int64_t          micro_edges_per_macro_edge,
+                                               real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDiffusion
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Keast 0 | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    135     123      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_dst,
+                                               real_t* RESTRICT _data_src,
+                                               real_t           macro_vertex_coord_id_0comp0,
+                                               real_t           macro_vertex_coord_id_0comp1,
+                                               real_t           macro_vertex_coord_id_0comp2,
+                                               real_t           macro_vertex_coord_id_1comp0,
+                                               real_t           macro_vertex_coord_id_1comp1,
+                                               real_t           macro_vertex_coord_id_1comp2,
+                                               real_t           macro_vertex_coord_id_2comp0,
+                                               real_t           macro_vertex_coord_id_2comp1,
+                                               real_t           macro_vertex_coord_id_2comp2,
+                                               real_t           macro_vertex_coord_id_3comp0,
+                                               real_t           macro_vertex_coord_id_3comp1,
+                                               real_t           macro_vertex_coord_id_3comp2,
+                                               int64_t          micro_edges_per_macro_edge,
+                                               real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Centroid rule | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     40      43      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P1ElementwiseDiffusion_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                                  idx_t* RESTRICT                      _data_src,
+                                                  real_t                               macro_vertex_coord_id_0comp0,
+                                                  real_t                               macro_vertex_coord_id_0comp1,
+                                                  real_t                               macro_vertex_coord_id_1comp0,
+                                                  real_t                               macro_vertex_coord_id_1comp1,
+                                                  real_t                               macro_vertex_coord_id_2comp0,
+                                                  real_t                               macro_vertex_coord_id_2comp1,
+                                                  std::shared_ptr< SparseMatrixProxy > mat,
+                                                  int64_t                              micro_edges_per_macro_edge,
+                                                  real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDiffusion
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Keast 0 | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    119     113      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P1ElementwiseDiffusion_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                                  idx_t* RESTRICT                      _data_src,
+                                                  real_t                               macro_vertex_coord_id_0comp0,
+                                                  real_t                               macro_vertex_coord_id_0comp1,
+                                                  real_t                               macro_vertex_coord_id_0comp2,
+                                                  real_t                               macro_vertex_coord_id_1comp0,
+                                                  real_t                               macro_vertex_coord_id_1comp1,
+                                                  real_t                               macro_vertex_coord_id_1comp2,
+                                                  real_t                               macro_vertex_coord_id_2comp0,
+                                                  real_t                               macro_vertex_coord_id_2comp1,
+                                                  real_t                               macro_vertex_coord_id_2comp2,
+                                                  real_t                               macro_vertex_coord_id_3comp0,
+                                                  real_t                               macro_vertex_coord_id_3comp1,
+                                                  real_t                               macro_vertex_coord_id_3comp2,
+                                                  std::shared_ptr< SparseMatrixProxy > mat,
+                                                  int64_t                              micro_edges_per_macro_edge,
+                                                  real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Centroid rule | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     40      34      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_invDiag_,
+                                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                                              int64_t          micro_edges_per_macro_edge,
+                                                                              real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDiffusion
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Keast 0 | points: 1, degree: 1
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    111      89      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_invDiag_,
+                                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                                              real_t           macro_vertex_coord_id_0comp2,
+                                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                                              real_t           macro_vertex_coord_id_1comp2,
+                                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                                              real_t           macro_vertex_coord_id_2comp2,
+                                                                              real_t           macro_vertex_coord_id_3comp0,
+                                                                              real_t           macro_vertex_coord_id_3comp1,
+                                                                              real_t           macro_vertex_coord_id_3comp2,
+                                                                              int64_t          micro_edges_per_macro_edge,
+                                                                              real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P1Function< real_t > > invDiag_;
 };
diff --git a/operators/diffusion/P2ElementwiseDiffusion.cpp b/operators/diffusion/P2ElementwiseDiffusion.cpp
index 67280970..1a42c563 100644
--- a/operators/diffusion/P2ElementwiseDiffusion.cpp
+++ b/operators/diffusion/P2ElementwiseDiffusion.cpp
@@ -129,7 +129,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseDiffusion_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -149,6 +149,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -216,7 +217,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseDiffusion_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -230,6 +231,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -296,7 +298,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseDiffusion_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -317,6 +319,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -347,7 +350,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseDiffusion_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -362,6 +365,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -412,7 +416,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -430,6 +434,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -472,7 +477,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -484,6 +489,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/diffusion/P2ElementwiseDiffusion.hpp b/operators/diffusion/P2ElementwiseDiffusion.hpp
index 6eca940a..919440af 100644
--- a/operators/diffusion/P2ElementwiseDiffusion.hpp
+++ b/operators/diffusion/P2ElementwiseDiffusion.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,130 +84,159 @@ class P2ElementwiseDiffusion : public Operator< P2Function< real_t >, P2Function
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    215     310      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                               real_t* RESTRICT _data_dstVertex,
+                                               real_t* RESTRICT _data_srcEdge,
+                                               real_t* RESTRICT _data_srcVertex,
+                                               real_t           macro_vertex_coord_id_0comp0,
+                                               real_t           macro_vertex_coord_id_0comp1,
+                                               real_t           macro_vertex_coord_id_1comp0,
+                                               real_t           macro_vertex_coord_id_1comp1,
+                                               real_t           macro_vertex_coord_id_2comp0,
+                                               real_t           macro_vertex_coord_id_2comp1,
+                                               int64_t          micro_edges_per_macro_edge,
+                                               real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDiffusion
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1086    1461      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                               real_t* RESTRICT _data_dstVertex,
+                                               real_t* RESTRICT _data_srcEdge,
+                                               real_t* RESTRICT _data_srcVertex,
+                                               real_t           macro_vertex_coord_id_0comp0,
+                                               real_t           macro_vertex_coord_id_0comp1,
+                                               real_t           macro_vertex_coord_id_0comp2,
+                                               real_t           macro_vertex_coord_id_1comp0,
+                                               real_t           macro_vertex_coord_id_1comp1,
+                                               real_t           macro_vertex_coord_id_1comp2,
+                                               real_t           macro_vertex_coord_id_2comp0,
+                                               real_t           macro_vertex_coord_id_2comp1,
+                                               real_t           macro_vertex_coord_id_2comp2,
+                                               real_t           macro_vertex_coord_id_3comp0,
+                                               real_t           macro_vertex_coord_id_3comp1,
+                                               real_t           macro_vertex_coord_id_3comp2,
+                                               int64_t          micro_edges_per_macro_edge,
+                                               real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    179     274      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseDiffusion_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                  idx_t* RESTRICT                      _data_dstVertex,
+                                                  idx_t* RESTRICT                      _data_srcEdge,
+                                                  idx_t* RESTRICT                      _data_srcVertex,
+                                                  real_t                               macro_vertex_coord_id_0comp0,
+                                                  real_t                               macro_vertex_coord_id_0comp1,
+                                                  real_t                               macro_vertex_coord_id_1comp0,
+                                                  real_t                               macro_vertex_coord_id_1comp1,
+                                                  real_t                               macro_vertex_coord_id_2comp0,
+                                                  real_t                               macro_vertex_coord_id_2comp1,
+                                                  std::shared_ptr< SparseMatrixProxy > mat,
+                                                  int64_t                              micro_edges_per_macro_edge,
+                                                  real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDiffusion
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    986    1361      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseDiffusion_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                  idx_t* RESTRICT                      _data_dstVertex,
+                                                  idx_t* RESTRICT                      _data_srcEdge,
+                                                  idx_t* RESTRICT                      _data_srcVertex,
+                                                  real_t                               macro_vertex_coord_id_0comp0,
+                                                  real_t                               macro_vertex_coord_id_0comp1,
+                                                  real_t                               macro_vertex_coord_id_0comp2,
+                                                  real_t                               macro_vertex_coord_id_1comp0,
+                                                  real_t                               macro_vertex_coord_id_1comp1,
+                                                  real_t                               macro_vertex_coord_id_1comp2,
+                                                  real_t                               macro_vertex_coord_id_2comp0,
+                                                  real_t                               macro_vertex_coord_id_2comp1,
+                                                  real_t                               macro_vertex_coord_id_2comp2,
+                                                  real_t                               macro_vertex_coord_id_3comp0,
+                                                  real_t                               macro_vertex_coord_id_3comp1,
+                                                  real_t                               macro_vertex_coord_id_3comp2,
+                                                  std::shared_ptr< SparseMatrixProxy > mat,
+                                                  int64_t                              micro_edges_per_macro_edge,
+                                                  real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDiffusion
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    110     127      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                              real_t* RESTRICT _data_invDiag_Vertex,
+                                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                                              int64_t          micro_edges_per_macro_edge,
+                                                                              real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDiffusion
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    381     497      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                              real_t* RESTRICT _data_invDiag_Vertex,
+                                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                                              real_t           macro_vertex_coord_id_0comp2,
+                                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                                              real_t           macro_vertex_coord_id_1comp2,
+                                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                                              real_t           macro_vertex_coord_id_2comp2,
+                                                                              real_t           macro_vertex_coord_id_3comp0,
+                                                                              real_t           macro_vertex_coord_id_3comp1,
+                                                                              real_t           macro_vertex_coord_id_3comp2,
+                                                                              int64_t          micro_edges_per_macro_edge,
+                                                                              real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
 };
diff --git a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp
index 5733867e..8dd27ac9 100644
--- a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp
+++ b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp
@@ -144,7 +144,7 @@ void P2ElementwiseDiffusionAnnulusMap::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseDiffusionAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -166,6 +166,7 @@ void P2ElementwiseDiffusionAnnulusMap::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -245,7 +246,7 @@ void P2ElementwiseDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMa
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -268,6 +269,7 @@ void P2ElementwiseDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMa
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -332,7 +334,7 @@ void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -352,6 +354,7 @@ void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues()
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp
index 5a5fa0bf..f08fa6df 100644
--- a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp
+++ b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,83 +85,97 @@ class P2ElementwiseDiffusionAnnulusMap : public Operator< P2Function< real_t >,
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseDiffusionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    385     607      17       8      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                         real_t* RESTRICT _data_dstVertex,
+                                                         real_t* RESTRICT _data_srcEdge,
+                                                         real_t* RESTRICT _data_srcVertex,
+                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                         int64_t          micro_edges_per_macro_edge,
+                                                         real_t           micro_edges_per_macro_edge_float,
+                                                         real_t           radRayVertex,
+                                                         real_t           radRefVertex,
+                                                         real_t           rayVertex_0,
+                                                         real_t           rayVertex_1,
+                                                         real_t           refVertex_0,
+                                                         real_t           refVertex_1,
+                                                         real_t           thrVertex_0,
+                                                         real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseDiffusionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    349     571      17       8      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                            idx_t* RESTRICT                      _data_dstVertex,
+                                                            idx_t* RESTRICT                      _data_srcEdge,
+                                                            idx_t* RESTRICT                      _data_srcVertex,
+                                                            real_t                               macro_vertex_coord_id_0comp0,
+                                                            real_t                               macro_vertex_coord_id_0comp1,
+                                                            real_t                               macro_vertex_coord_id_1comp0,
+                                                            real_t                               macro_vertex_coord_id_1comp1,
+                                                            real_t                               macro_vertex_coord_id_2comp0,
+                                                            real_t                               macro_vertex_coord_id_2comp1,
+                                                            std::shared_ptr< SparseMatrixProxy > mat,
+                                                            int64_t                              micro_edges_per_macro_edge,
+                                                            real_t                               micro_edges_per_macro_edge_float,
+                                                            real_t                               radRayVertex,
+                                                            real_t                               radRefVertex,
+                                                            real_t                               rayVertex_0,
+                                                            real_t                               rayVertex_1,
+                                                            real_t                               refVertex_0,
+                                                            real_t                               refVertex_1,
+                                                            real_t                               thrVertex_0,
+                                                            real_t                               thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseDiffusionAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    250     391      17       8      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                        real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                        real_t  macro_vertex_coord_id_0comp0,
+                                                                                        real_t  macro_vertex_coord_id_0comp1,
+                                                                                        real_t  macro_vertex_coord_id_1comp0,
+                                                                                        real_t  macro_vertex_coord_id_1comp1,
+                                                                                        real_t  macro_vertex_coord_id_2comp0,
+                                                                                        real_t  macro_vertex_coord_id_2comp1,
+                                                                                        int64_t micro_edges_per_macro_edge,
+                                                                                        real_t  micro_edges_per_macro_edge_float,
+                                                                                        real_t  radRayVertex,
+                                                                                        real_t  radRefVertex,
+                                                                                        real_t  rayVertex_0,
+                                                                                        real_t  rayVertex_1,
+                                                                                        real_t  refVertex_0,
+                                                                                        real_t  refVertex_1,
+                                                                                        real_t  thrVertex_0,
+                                                                                        real_t  thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
 };
diff --git a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp
index 4f8d29bc..f639eaae 100644
--- a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp
+++ b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp
@@ -147,7 +147,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::apply( const P2Function< real_t
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -181,6 +181,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::apply( const P2Function< real_t
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -272,7 +273,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix( const std::shared_ptr<
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -307,6 +308,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix( const std::shared_ptr<
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -382,7 +384,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorVa
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -414,6 +416,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorVa
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp
index e5ef874b..70628126 100644
--- a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp
+++ b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -85,119 +87,134 @@ class P2ElementwiseDiffusionIcosahedralShellMap : public Operator< P2Function< r
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseDiffusionIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1729    2398      42       5      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                  real_t* RESTRICT _data_dstVertex,
+                                                                  real_t* RESTRICT _data_srcEdge,
+                                                                  real_t* RESTRICT _data_srcVertex,
+                                                                  real_t           forVertex_0,
+                                                                  real_t           forVertex_1,
+                                                                  real_t           forVertex_2,
+                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                  real_t           macro_vertex_coord_id_0comp2,
+                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                  real_t           macro_vertex_coord_id_1comp2,
+                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                  real_t           macro_vertex_coord_id_2comp2,
+                                                                  real_t           macro_vertex_coord_id_3comp0,
+                                                                  real_t           macro_vertex_coord_id_3comp1,
+                                                                  real_t           macro_vertex_coord_id_3comp2,
+                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                  real_t           micro_edges_per_macro_edge_float,
+                                                                  real_t           radRayVertex,
+                                                                  real_t           radRefVertex,
+                                                                  real_t           rayVertex_0,
+                                                                  real_t           rayVertex_1,
+                                                                  real_t           rayVertex_2,
+                                                                  real_t           refVertex_0,
+                                                                  real_t           refVertex_1,
+                                                                  real_t           refVertex_2,
+                                                                  real_t           thrVertex_0,
+                                                                  real_t           thrVertex_1,
+                                                                  real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseDiffusionIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1629    2298      42       5      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge,
+                                                                     idx_t* RESTRICT _data_dstVertex,
+                                                                     idx_t* RESTRICT _data_srcEdge,
+                                                                     idx_t* RESTRICT _data_srcVertex,
+                                                                     real_t          forVertex_0,
+                                                                     real_t          forVertex_1,
+                                                                     real_t          forVertex_2,
+                                                                     real_t          macro_vertex_coord_id_0comp0,
+                                                                     real_t          macro_vertex_coord_id_0comp1,
+                                                                     real_t          macro_vertex_coord_id_0comp2,
+                                                                     real_t          macro_vertex_coord_id_1comp0,
+                                                                     real_t          macro_vertex_coord_id_1comp1,
+                                                                     real_t          macro_vertex_coord_id_1comp2,
+                                                                     real_t          macro_vertex_coord_id_2comp0,
+                                                                     real_t          macro_vertex_coord_id_2comp1,
+                                                                     real_t          macro_vertex_coord_id_2comp2,
+                                                                     real_t          macro_vertex_coord_id_3comp0,
+                                                                     real_t          macro_vertex_coord_id_3comp1,
+                                                                     real_t          macro_vertex_coord_id_3comp2,
+                                                                     std::shared_ptr< SparseMatrixProxy > mat,
+                                                                     int64_t micro_edges_per_macro_edge,
+                                                                     real_t  micro_edges_per_macro_edge_float,
+                                                                     real_t  radRayVertex,
+                                                                     real_t  radRefVertex,
+                                                                     real_t  rayVertex_0,
+                                                                     real_t  rayVertex_1,
+                                                                     real_t  rayVertex_2,
+                                                                     real_t  refVertex_0,
+                                                                     real_t  refVertex_1,
+                                                                     real_t  refVertex_2,
+                                                                     real_t  thrVertex_0,
+                                                                     real_t  thrVertex_1,
+                                                                     real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseDiffusionIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1009    1398      42       5      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
 };
diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp
rename to operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
index d9b0c4db..60b87ab9 100644
--- a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp
+++ b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp
rename to operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
index f6023630..b41a0d92 100644
--- a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp
+++ b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
index 0f3d04a1..7f00b2b8 100644
--- a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
index afaf36e1..9d11044c 100644
--- a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
rename to operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
index 3cc11289..ac0e0104 100644
--- a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
+++ b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDiffusionAnnulusMap::apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
index 2b6fdcb8..bdcda6ad 100644
--- a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
index 371da2f5..79e9770d 100644
--- a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDiffusionIcosahedralShellMap::apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
index 334bc530..911470e4 100644
--- a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp
rename to operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
index 35800f77..631a97d6 100644
--- a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp
+++ b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp
rename to operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
index 5420835a..50b747de 100644
--- a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp
+++ b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
index 96c7ff88..6438fa3d 100644
--- a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
index 114ae31f..0b778cc7 100644
--- a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
similarity index 98%
rename from operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp
rename to operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
index ae6d97a5..e6389f85 100644
--- a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp
+++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp
rename to operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
index ff9d3ee6..90d1f0f2 100644
--- a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp
+++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
similarity index 98%
rename from operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
index 84296219..8d5dc9bb 100644
--- a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
index b6973f15..c4d8f511 100644
--- a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp
similarity index 98%
rename from operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp
rename to operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp
index a8c60c59..3260de76 100644
--- a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp
+++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::toMatrix_P1ElementwiseDiffusion_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp
rename to operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp
index 5190dd49..b4b506a9 100644
--- a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp
+++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDiffusion::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDiffusion::toMatrix_P1ElementwiseDiffusion_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
index 365494c8..7896b446 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDiffusionAnnulusMap::apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
index e1d257fc..59c2a883 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
index 82d4f6a6..982ff306 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDiffusionAnnulusMap::toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
index 4645c9f6..6860a492 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDiffusionIcosahedralShellMap::apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
index abd0fc64..d532a935 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
index 2c546d5d..89f5f78a 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
index 27134c90..b2ae9b4c 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
index 2d62014f..ea6ed321 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
similarity index 98%
rename from operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
index 5ee134f2..00858d29 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
index 9b0bd97e..195e8de9 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp
index 23e440f5..81ca2f5b 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::toMatrix_P2ElementwiseDiffusion_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp
similarity index 99%
rename from operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp
rename to operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp
index 0a27a28b..a687924a 100644
--- a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDiffusion::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDiffusion::toMatrix_P2ElementwiseDiffusion_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/div_k_grad/CMakeLists.txt b/operators/div_k_grad/CMakeLists.txt
index bd44475b..a917f326 100644
--- a/operators/div_k_grad/CMakeLists.txt
+++ b/operators/div_k_grad/CMakeLists.txt
@@ -13,40 +13,40 @@ add_library( opgen-div_k_grad
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-div_k_grad PRIVATE
 
-      avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp
-      avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp
-      avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp
-      avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp
-      avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp
+      avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
+      avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
+      avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
+      avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
+      avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
+      avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
+      avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
+      avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
+      noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp
+      noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp
+      noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp
+      noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp
-      avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp
-      avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp
-      avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp
-      avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
+      avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
+      avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
+      avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
+      avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
+      avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
+      avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
+      avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -57,24 +57,24 @@ else()
 
    target_sources(opgen-div_k_grad PRIVATE
 
-      noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp
-      noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp
-      noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp
-      noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp
-      noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp
-      noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp
-      noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp
+      noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
+      noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
+      noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
+      noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
+      noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp
+      noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp
+      noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
+      noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
+      noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
+      noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
+      noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp
+      noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/div_k_grad/P1ElementwiseDivKGrad.cpp b/operators/div_k_grad/P1ElementwiseDivKGrad.cpp
index ee98f134..a5dd1f51 100644
--- a/operators/div_k_grad/P1ElementwiseDivKGrad.cpp
+++ b/operators/div_k_grad/P1ElementwiseDivKGrad.cpp
@@ -133,7 +133,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ElementwiseDivKGrad_macro_3D(
 
              _data_dst,
              _data_k,
@@ -152,6 +152,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -200,7 +201,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ElementwiseDivKGrad_macro_2D(
 
              _data_dst,
              _data_k,
@@ -213,6 +214,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -276,7 +278,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy >
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ElementwiseDivKGrad_macro_3D(
 
              _data_dst,
              _data_k,
@@ -296,6 +298,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy >
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -325,7 +328,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy >
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ElementwiseDivKGrad_macro_2D(
 
              _data_dst,
              _data_k,
@@ -339,6 +342,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy >
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -390,7 +394,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D(
 
                 _data_invDiag_,
                 _data_k,
@@ -408,6 +412,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -447,7 +452,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D(
 
                 _data_invDiag_,
                 _data_k,
@@ -459,6 +464,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/div_k_grad/P1ElementwiseDivKGrad.hpp b/operators/div_k_grad/P1ElementwiseDivKGrad.hpp
index f1c5b366..3862da35 100644
--- a/operators/div_k_grad/P1ElementwiseDivKGrad.hpp
+++ b/operators/div_k_grad/P1ElementwiseDivKGrad.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,126 +84,155 @@ class P1ElementwiseDivKGrad : public Operator< P1Function< real_t >, P1Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ElementwiseDivKGrad
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     95     102      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_k,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P1ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_dst,
+                                              real_t* RESTRICT _data_k,
+                                              real_t* RESTRICT _data_src,
+                                              real_t           macro_vertex_coord_id_0comp0,
+                                              real_t           macro_vertex_coord_id_0comp1,
+                                              real_t           macro_vertex_coord_id_1comp0,
+                                              real_t           macro_vertex_coord_id_1comp1,
+                                              real_t           macro_vertex_coord_id_2comp0,
+                                              real_t           macro_vertex_coord_id_2comp1,
+                                              int64_t          micro_edges_per_macro_edge,
+                                              real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDivKGrad
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    277     272      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_k,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_dst,
+                                              real_t* RESTRICT _data_k,
+                                              real_t* RESTRICT _data_src,
+                                              real_t           macro_vertex_coord_id_0comp0,
+                                              real_t           macro_vertex_coord_id_0comp1,
+                                              real_t           macro_vertex_coord_id_0comp2,
+                                              real_t           macro_vertex_coord_id_1comp0,
+                                              real_t           macro_vertex_coord_id_1comp1,
+                                              real_t           macro_vertex_coord_id_1comp2,
+                                              real_t           macro_vertex_coord_id_2comp0,
+                                              real_t           macro_vertex_coord_id_2comp1,
+                                              real_t           macro_vertex_coord_id_2comp2,
+                                              real_t           macro_vertex_coord_id_3comp0,
+                                              real_t           macro_vertex_coord_id_3comp1,
+                                              real_t           macro_vertex_coord_id_3comp2,
+                                              int64_t          micro_edges_per_macro_edge,
+                                              real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDivKGrad
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     86      93      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_k,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P1ElementwiseDivKGrad_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                                 real_t* RESTRICT                     _data_k,
+                                                 idx_t* RESTRICT                      _data_src,
+                                                 real_t                               macro_vertex_coord_id_0comp0,
+                                                 real_t                               macro_vertex_coord_id_0comp1,
+                                                 real_t                               macro_vertex_coord_id_1comp0,
+                                                 real_t                               macro_vertex_coord_id_1comp1,
+                                                 real_t                               macro_vertex_coord_id_2comp0,
+                                                 real_t                               macro_vertex_coord_id_2comp1,
+                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                 int64_t                              micro_edges_per_macro_edge,
+                                                 real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDivKGrad
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    261     256      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_k,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P1ElementwiseDivKGrad_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                                 real_t* RESTRICT                     _data_k,
+                                                 idx_t* RESTRICT                      _data_src,
+                                                 real_t                               macro_vertex_coord_id_0comp0,
+                                                 real_t                               macro_vertex_coord_id_0comp1,
+                                                 real_t                               macro_vertex_coord_id_0comp2,
+                                                 real_t                               macro_vertex_coord_id_1comp0,
+                                                 real_t                               macro_vertex_coord_id_1comp1,
+                                                 real_t                               macro_vertex_coord_id_1comp2,
+                                                 real_t                               macro_vertex_coord_id_2comp0,
+                                                 real_t                               macro_vertex_coord_id_2comp1,
+                                                 real_t                               macro_vertex_coord_id_2comp2,
+                                                 real_t                               macro_vertex_coord_id_3comp0,
+                                                 real_t                               macro_vertex_coord_id_3comp1,
+                                                 real_t                               macro_vertex_coord_id_3comp2,
+                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                 int64_t                              micro_edges_per_macro_edge,
+                                                 real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDivKGrad
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     71      66      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t* RESTRICT _data_k,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_invDiag_,
+                                                                             real_t* RESTRICT _data_k,
+                                                                             real_t           macro_vertex_coord_id_0comp0,
+                                                                             real_t           macro_vertex_coord_id_0comp1,
+                                                                             real_t           macro_vertex_coord_id_1comp0,
+                                                                             real_t           macro_vertex_coord_id_1comp1,
+                                                                             real_t           macro_vertex_coord_id_2comp0,
+                                                                             real_t           macro_vertex_coord_id_2comp1,
+                                                                             int64_t          micro_edges_per_macro_edge,
+                                                                             real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseDivKGrad
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    193     160      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t* RESTRICT _data_k,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_invDiag_,
+                                                                             real_t* RESTRICT _data_k,
+                                                                             real_t           macro_vertex_coord_id_0comp0,
+                                                                             real_t           macro_vertex_coord_id_0comp1,
+                                                                             real_t           macro_vertex_coord_id_0comp2,
+                                                                             real_t           macro_vertex_coord_id_1comp0,
+                                                                             real_t           macro_vertex_coord_id_1comp1,
+                                                                             real_t           macro_vertex_coord_id_1comp2,
+                                                                             real_t           macro_vertex_coord_id_2comp0,
+                                                                             real_t           macro_vertex_coord_id_2comp1,
+                                                                             real_t           macro_vertex_coord_id_2comp2,
+                                                                             real_t           macro_vertex_coord_id_3comp0,
+                                                                             real_t           macro_vertex_coord_id_3comp1,
+                                                                             real_t           macro_vertex_coord_id_3comp2,
+                                                                             int64_t          micro_edges_per_macro_edge,
+                                                                             real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P1Function< real_t > > invDiag_;
    P1Function< real_t >                    k;
diff --git a/operators/div_k_grad/P2ElementwiseDivKGrad.cpp b/operators/div_k_grad/P2ElementwiseDivKGrad.cpp
index 76309616..9474445b 100644
--- a/operators/div_k_grad/P2ElementwiseDivKGrad.cpp
+++ b/operators/div_k_grad/P2ElementwiseDivKGrad.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseDivKGrad_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseDivKGrad_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy >
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseDivKGrad_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy >
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy >
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseDivKGrad_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy >
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -440,7 +444,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -460,6 +464,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -504,7 +509,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -518,6 +523,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/div_k_grad/P2ElementwiseDivKGrad.hpp b/operators/div_k_grad/P2ElementwiseDivKGrad.hpp
index 8029975a..4dc8becf 100644
--- a/operators/div_k_grad/P2ElementwiseDivKGrad.hpp
+++ b/operators/div_k_grad/P2ElementwiseDivKGrad.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,142 +84,171 @@ class P2ElementwiseDivKGrad : public Operator< P2Function< real_t >, P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseDivKGrad
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    290     378      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                              real_t* RESTRICT _data_dstVertex,
+                                              real_t* RESTRICT _data_kEdge,
+                                              real_t* RESTRICT _data_kVertex,
+                                              real_t* RESTRICT _data_srcEdge,
+                                              real_t* RESTRICT _data_srcVertex,
+                                              real_t           macro_vertex_coord_id_0comp0,
+                                              real_t           macro_vertex_coord_id_0comp1,
+                                              real_t           macro_vertex_coord_id_1comp0,
+                                              real_t           macro_vertex_coord_id_1comp1,
+                                              real_t           macro_vertex_coord_id_2comp0,
+                                              real_t           macro_vertex_coord_id_2comp1,
+                                              int64_t          micro_edges_per_macro_edge,
+                                              real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDivKGrad
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1273    1640      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                              real_t* RESTRICT _data_dstVertex,
+                                              real_t* RESTRICT _data_kEdge,
+                                              real_t* RESTRICT _data_kVertex,
+                                              real_t* RESTRICT _data_srcEdge,
+                                              real_t* RESTRICT _data_srcVertex,
+                                              real_t           macro_vertex_coord_id_0comp0,
+                                              real_t           macro_vertex_coord_id_0comp1,
+                                              real_t           macro_vertex_coord_id_0comp2,
+                                              real_t           macro_vertex_coord_id_1comp0,
+                                              real_t           macro_vertex_coord_id_1comp1,
+                                              real_t           macro_vertex_coord_id_1comp2,
+                                              real_t           macro_vertex_coord_id_2comp0,
+                                              real_t           macro_vertex_coord_id_2comp1,
+                                              real_t           macro_vertex_coord_id_2comp2,
+                                              real_t           macro_vertex_coord_id_3comp0,
+                                              real_t           macro_vertex_coord_id_3comp1,
+                                              real_t           macro_vertex_coord_id_3comp2,
+                                              int64_t          micro_edges_per_macro_edge,
+                                              real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDivKGrad
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    254     342      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseDivKGrad_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                 idx_t* RESTRICT                      _data_dstVertex,
+                                                 real_t* RESTRICT                     _data_kEdge,
+                                                 real_t* RESTRICT                     _data_kVertex,
+                                                 idx_t* RESTRICT                      _data_srcEdge,
+                                                 idx_t* RESTRICT                      _data_srcVertex,
+                                                 real_t                               macro_vertex_coord_id_0comp0,
+                                                 real_t                               macro_vertex_coord_id_0comp1,
+                                                 real_t                               macro_vertex_coord_id_1comp0,
+                                                 real_t                               macro_vertex_coord_id_1comp1,
+                                                 real_t                               macro_vertex_coord_id_2comp0,
+                                                 real_t                               macro_vertex_coord_id_2comp1,
+                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                 int64_t                              micro_edges_per_macro_edge,
+                                                 real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDivKGrad
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1173    1540      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseDivKGrad_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                 idx_t* RESTRICT                      _data_dstVertex,
+                                                 real_t* RESTRICT                     _data_kEdge,
+                                                 real_t* RESTRICT                     _data_kVertex,
+                                                 idx_t* RESTRICT                      _data_srcEdge,
+                                                 idx_t* RESTRICT                      _data_srcVertex,
+                                                 real_t                               macro_vertex_coord_id_0comp0,
+                                                 real_t                               macro_vertex_coord_id_0comp1,
+                                                 real_t                               macro_vertex_coord_id_0comp2,
+                                                 real_t                               macro_vertex_coord_id_1comp0,
+                                                 real_t                               macro_vertex_coord_id_1comp1,
+                                                 real_t                               macro_vertex_coord_id_1comp2,
+                                                 real_t                               macro_vertex_coord_id_2comp0,
+                                                 real_t                               macro_vertex_coord_id_2comp1,
+                                                 real_t                               macro_vertex_coord_id_2comp2,
+                                                 real_t                               macro_vertex_coord_id_3comp0,
+                                                 real_t                               macro_vertex_coord_id_3comp1,
+                                                 real_t                               macro_vertex_coord_id_3comp2,
+                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                 int64_t                              micro_edges_per_macro_edge,
+                                                 real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDivKGrad
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    170     195      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_kEdge,
-                                                       real_t* RESTRICT _data_kVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                             real_t* RESTRICT _data_invDiag_Vertex,
+                                                                             real_t* RESTRICT _data_kEdge,
+                                                                             real_t* RESTRICT _data_kVertex,
+                                                                             real_t           macro_vertex_coord_id_0comp0,
+                                                                             real_t           macro_vertex_coord_id_0comp1,
+                                                                             real_t           macro_vertex_coord_id_1comp0,
+                                                                             real_t           macro_vertex_coord_id_1comp1,
+                                                                             real_t           macro_vertex_coord_id_2comp0,
+                                                                             real_t           macro_vertex_coord_id_2comp1,
+                                                                             int64_t          micro_edges_per_macro_edge,
+                                                                             real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseDivKGrad
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    523     676      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_kEdge,
-                                                       real_t* RESTRICT _data_kVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                             real_t* RESTRICT _data_invDiag_Vertex,
+                                                                             real_t* RESTRICT _data_kEdge,
+                                                                             real_t* RESTRICT _data_kVertex,
+                                                                             real_t           macro_vertex_coord_id_0comp0,
+                                                                             real_t           macro_vertex_coord_id_0comp1,
+                                                                             real_t           macro_vertex_coord_id_0comp2,
+                                                                             real_t           macro_vertex_coord_id_1comp0,
+                                                                             real_t           macro_vertex_coord_id_1comp1,
+                                                                             real_t           macro_vertex_coord_id_1comp2,
+                                                                             real_t           macro_vertex_coord_id_2comp0,
+                                                                             real_t           macro_vertex_coord_id_2comp1,
+                                                                             real_t           macro_vertex_coord_id_2comp2,
+                                                                             real_t           macro_vertex_coord_id_3comp0,
+                                                                             real_t           macro_vertex_coord_id_3comp1,
+                                                                             real_t           macro_vertex_coord_id_3comp2,
+                                                                             int64_t          micro_edges_per_macro_edge,
+                                                                             real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    k;
diff --git a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp
index 65d43fda..6804e03d 100644
--- a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp
+++ b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseDivKGradAnnulusMap::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseDivKGradAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseDivKGradAnnulusMap::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseDivKGradAnnulusMap::toMatrix( const std::shared_ptr< SparseMat
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseDivKGradAnnulusMap::toMatrix( const std::shared_ptr< SparseMat
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -349,7 +351,7 @@ void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -371,6 +373,7 @@ void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues()
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp
index fed80068..511b0056 100644
--- a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp
+++ b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,89 +85,103 @@ class P2ElementwiseDivKGradAnnulusMap : public Operator< P2Function< real_t >, P
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseDivKGradAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    668    1044      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                        real_t* RESTRICT _data_dstVertex,
+                                                        real_t* RESTRICT _data_kEdge,
+                                                        real_t* RESTRICT _data_kVertex,
+                                                        real_t* RESTRICT _data_srcEdge,
+                                                        real_t* RESTRICT _data_srcVertex,
+                                                        real_t           macro_vertex_coord_id_0comp0,
+                                                        real_t           macro_vertex_coord_id_0comp1,
+                                                        real_t           macro_vertex_coord_id_1comp0,
+                                                        real_t           macro_vertex_coord_id_1comp1,
+                                                        real_t           macro_vertex_coord_id_2comp0,
+                                                        real_t           macro_vertex_coord_id_2comp1,
+                                                        int64_t          micro_edges_per_macro_edge,
+                                                        real_t           micro_edges_per_macro_edge_float,
+                                                        real_t           radRayVertex,
+                                                        real_t           radRefVertex,
+                                                        real_t           rayVertex_0,
+                                                        real_t           rayVertex_1,
+                                                        real_t           refVertex_0,
+                                                        real_t           refVertex_1,
+                                                        real_t           thrVertex_0,
+                                                        real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseDivKGradAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    632    1008      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                           idx_t* RESTRICT                      _data_dstVertex,
+                                                           real_t* RESTRICT                     _data_kEdge,
+                                                           real_t* RESTRICT                     _data_kVertex,
+                                                           idx_t* RESTRICT                      _data_srcEdge,
+                                                           idx_t* RESTRICT                      _data_srcVertex,
+                                                           real_t                               macro_vertex_coord_id_0comp0,
+                                                           real_t                               macro_vertex_coord_id_0comp1,
+                                                           real_t                               macro_vertex_coord_id_1comp0,
+                                                           real_t                               macro_vertex_coord_id_1comp1,
+                                                           real_t                               macro_vertex_coord_id_2comp0,
+                                                           real_t                               macro_vertex_coord_id_2comp1,
+                                                           std::shared_ptr< SparseMatrixProxy > mat,
+                                                           int64_t                              micro_edges_per_macro_edge,
+                                                           real_t                               micro_edges_per_macro_edge_float,
+                                                           real_t                               radRayVertex,
+                                                           real_t                               radRefVertex,
+                                                           real_t                               rayVertex_0,
+                                                           real_t                               rayVertex_1,
+                                                           real_t                               refVertex_0,
+                                                           real_t                               refVertex_1,
+                                                           real_t                               thrVertex_0,
+                                                           real_t                               thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseDivKGradAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    518     828      28      20      4              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_kEdge,
-                                                       real_t* RESTRICT _data_kVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                       real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                       real_t* RESTRICT _data_kEdge,
+                                                                                       real_t* RESTRICT _data_kVertex,
+                                                                                       real_t  macro_vertex_coord_id_0comp0,
+                                                                                       real_t  macro_vertex_coord_id_0comp1,
+                                                                                       real_t  macro_vertex_coord_id_1comp0,
+                                                                                       real_t  macro_vertex_coord_id_1comp1,
+                                                                                       real_t  macro_vertex_coord_id_2comp0,
+                                                                                       real_t  macro_vertex_coord_id_2comp1,
+                                                                                       int64_t micro_edges_per_macro_edge,
+                                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                                       real_t  radRayVertex,
+                                                                                       real_t  radRefVertex,
+                                                                                       real_t  rayVertex_0,
+                                                                                       real_t  rayVertex_1,
+                                                                                       real_t  refVertex_0,
+                                                                                       real_t  refVertex_1,
+                                                                                       real_t  thrVertex_0,
+                                                                                       real_t  thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    k;
diff --git a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp
index 07cd64ae..aae2d630 100644
--- a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp
+++ b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::apply( const P2Function< real_t >
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::apply( const P2Function< real_t >
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix( const std::shared_ptr<
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix( const std::shared_ptr<
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -401,7 +403,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorVal
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -435,6 +437,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorVal
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp
index ae3ca25c..81e94998 100644
--- a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp
+++ b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,125 +85,140 @@ class P2ElementwiseDivKGradIcosahedralShellMap : public Operator< P2Function< re
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseDivKGradIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2453    3892      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                 real_t* RESTRICT _data_dstVertex,
+                                                                 real_t* RESTRICT _data_kEdge,
+                                                                 real_t* RESTRICT _data_kVertex,
+                                                                 real_t* RESTRICT _data_srcEdge,
+                                                                 real_t* RESTRICT _data_srcVertex,
+                                                                 real_t           forVertex_0,
+                                                                 real_t           forVertex_1,
+                                                                 real_t           forVertex_2,
+                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                                 int64_t          micro_edges_per_macro_edge,
+                                                                 real_t           micro_edges_per_macro_edge_float,
+                                                                 real_t           radRayVertex,
+                                                                 real_t           radRefVertex,
+                                                                 real_t           rayVertex_0,
+                                                                 real_t           rayVertex_1,
+                                                                 real_t           rayVertex_2,
+                                                                 real_t           refVertex_0,
+                                                                 real_t           refVertex_1,
+                                                                 real_t           refVertex_2,
+                                                                 real_t           thrVertex_0,
+                                                                 real_t           thrVertex_1,
+                                                                 real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseDivKGradIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2353    3792      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                    idx_t* RESTRICT  _data_dstVertex,
+                                                                    real_t* RESTRICT _data_kEdge,
+                                                                    real_t* RESTRICT _data_kVertex,
+                                                                    idx_t* RESTRICT  _data_srcEdge,
+                                                                    idx_t* RESTRICT  _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                                    int64_t micro_edges_per_macro_edge,
+                                                                    real_t  micro_edges_per_macro_edge_float,
+                                                                    real_t  radRayVertex,
+                                                                    real_t  radRefVertex,
+                                                                    real_t  rayVertex_0,
+                                                                    real_t  rayVertex_1,
+                                                                    real_t  rayVertex_2,
+                                                                    real_t  refVertex_0,
+                                                                    real_t  refVertex_1,
+                                                                    real_t  refVertex_2,
+                                                                    real_t  thrVertex_0,
+                                                                    real_t  thrVertex_1,
+                                                                    real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseDivKGradIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1688    2892      66      15      5              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_kEdge,
-                                                       real_t* RESTRICT _data_kVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_kEdge,
+       real_t* RESTRICT _data_kVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    k;
diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp
rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
index e9ee5836..742b6a5d 100644
--- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp
+++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp
rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
index 62884c29..07516062 100644
--- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp
+++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
index 6a72549f..7e48f505 100644
--- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
index 32844194..a5a8fe88 100644
--- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp
rename to operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
index 9f880a45..9b41394a 100644
--- a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp
+++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDivKGradAnnulusMap::apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
index 7fb1e61d..c3180dc5 100644
--- a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
index 0a49d7ba..621b9144 100644
--- a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDivKGradIcosahedralShellMap::apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
index 27a84e72..9e074b2a 100644
--- a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp
rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
index 3d15cd08..ae8283c9 100644
--- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp
+++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp
rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
index e0b46bc2..c3b7e54a 100644
--- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp
+++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
index 0b753df5..241e57c5 100644
--- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
index 3b9c2989..55e3fa2a 100644
--- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
similarity index 97%
rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp
rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
index a20e98e0..08fdf639 100644
--- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp
rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
index 248a21e3..ebf6a755 100644
--- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
similarity index 97%
rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
index 37f59b2a..b1aea623 100644
--- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
index 262ee517..76b3867c 100644
--- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp
similarity index 97%
rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp
rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp
index 93e49e99..0258b90a 100644
--- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::toMatrix_P1ElementwiseDivKGrad_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp
rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp
index 32fca0a6..b09d8354 100644
--- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseDivKGrad::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseDivKGrad::toMatrix_P1ElementwiseDivKGrad_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
index 2474e6cc..42622258 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDivKGradAnnulusMap::apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
index b4fc63cc..756d25bd 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
index eb9f7a41..68c6f015 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseDivKGradAnnulusMap::toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
index 3e6876d2..7ef705be 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDivKGradIcosahedralShellMap::apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
index 2ac58fd8..1e24196d 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
index 8dc72e26..04fced8a 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
similarity index 98%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
index 135d9eff..17c7e3af 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
index b04e3118..3b8272b2 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
similarity index 98%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
index dace6244..f2767976 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
index d4ee433c..f2ccc2b2 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp
similarity index 98%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp
index d19cb169..16cf14e5 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::toMatrix_P2ElementwiseDivKGrad_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp
similarity index 99%
rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp
rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp
index 280d8106..a9ef3882 100644
--- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp
+++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseDivKGrad::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseDivKGrad::toMatrix_P2ElementwiseDivKGrad_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/divergence/CMakeLists.txt b/operators/divergence/CMakeLists.txt
index a3729803..dec538e7 100644
--- a/operators/divergence/CMakeLists.txt
+++ b/operators/divergence/CMakeLists.txt
@@ -21,40 +21,40 @@ add_library( opgen-divergence
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-divergence PRIVATE
 
-      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
+      avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -65,26 +65,26 @@ else()
 
    target_sources(opgen-divergence PRIVATE
 
-      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp
index a3480614..4279d509 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp
@@ -133,7 +133,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply( const P2Function< real_t
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D(
 
              _data_dst,
              _data_srcEdge,
@@ -154,6 +154,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply( const P2Function< real_t
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix( const std::shared_ptr<
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D(
 
              _data_dst,
              _data_srcEdge,
@@ -250,6 +251,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix( const std::shared_ptr<
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp
index 8480f840..99cb2ed6 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,57 +79,66 @@ class P2ToP1ElementwiseDivergenceAnnulusMap_0_0 : public Operator< P2Function< r
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    220     318      17      12      3              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dst,
+                                                                  real_t* RESTRICT _data_srcEdge,
+                                                                  real_t* RESTRICT _data_srcVertex,
+                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                  real_t           micro_edges_per_macro_edge_float,
+                                                                  real_t           radRayVertex,
+                                                                  real_t           radRefVertex,
+                                                                  real_t           rayVertex_0,
+                                                                  real_t           rayVertex_1,
+                                                                  real_t           refVertex_0,
+                                                                  real_t           refVertex_1,
+                                                                  real_t           thrVertex_0,
+                                                                  real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    202     300      17      12      3              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dst,
+                                                                     idx_t* RESTRICT _data_srcEdge,
+                                                                     idx_t* RESTRICT _data_srcVertex,
+                                                                     real_t          macro_vertex_coord_id_0comp0,
+                                                                     real_t          macro_vertex_coord_id_0comp1,
+                                                                     real_t          macro_vertex_coord_id_1comp0,
+                                                                     real_t          macro_vertex_coord_id_1comp1,
+                                                                     real_t          macro_vertex_coord_id_2comp0,
+                                                                     real_t          macro_vertex_coord_id_2comp1,
+                                                                     std::shared_ptr< SparseMatrixProxy > mat,
+                                                                     int64_t micro_edges_per_macro_edge,
+                                                                     real_t  micro_edges_per_macro_edge_float,
+                                                                     real_t  radRayVertex,
+                                                                     real_t  radRefVertex,
+                                                                     real_t  rayVertex_0,
+                                                                     real_t  rayVertex_1,
+                                                                     real_t  refVertex_0,
+                                                                     real_t  refVertex_1,
+                                                                     real_t  thrVertex_0,
+                                                                     real_t  thrVertex_1 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp
index 682f2f25..f63530f1 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp
@@ -133,7 +133,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply( const P2Function< real_t
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D(
 
              _data_dst,
              _data_srcEdge,
@@ -154,6 +154,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply( const P2Function< real_t
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix( const std::shared_ptr<
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D(
 
              _data_dst,
              _data_srcEdge,
@@ -250,6 +251,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix( const std::shared_ptr<
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp
index da1d51b4..d9659d20 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,57 +79,66 @@ class P2ToP1ElementwiseDivergenceAnnulusMap_0_1 : public Operator< P2Function< r
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    220     318      17      12      3              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dst,
+                                                                  real_t* RESTRICT _data_srcEdge,
+                                                                  real_t* RESTRICT _data_srcVertex,
+                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                  real_t           micro_edges_per_macro_edge_float,
+                                                                  real_t           radRayVertex,
+                                                                  real_t           radRefVertex,
+                                                                  real_t           rayVertex_0,
+                                                                  real_t           rayVertex_1,
+                                                                  real_t           refVertex_0,
+                                                                  real_t           refVertex_1,
+                                                                  real_t           thrVertex_0,
+                                                                  real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    202     300      17      12      3              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( idx_t* RESTRICT _data_dst,
+                                                                     idx_t* RESTRICT _data_srcEdge,
+                                                                     idx_t* RESTRICT _data_srcVertex,
+                                                                     real_t          macro_vertex_coord_id_0comp0,
+                                                                     real_t          macro_vertex_coord_id_0comp1,
+                                                                     real_t          macro_vertex_coord_id_1comp0,
+                                                                     real_t          macro_vertex_coord_id_1comp1,
+                                                                     real_t          macro_vertex_coord_id_2comp0,
+                                                                     real_t          macro_vertex_coord_id_2comp1,
+                                                                     std::shared_ptr< SparseMatrixProxy > mat,
+                                                                     int64_t micro_edges_per_macro_edge,
+                                                                     real_t  micro_edges_per_macro_edge_float,
+                                                                     real_t  radRayVertex,
+                                                                     real_t  radRefVertex,
+                                                                     real_t  rayVertex_0,
+                                                                     real_t  rayVertex_1,
+                                                                     real_t  refVertex_0,
+                                                                     real_t  refVertex_1,
+                                                                     real_t  thrVertex_0,
+                                                                     real_t  thrVertex_1 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp
index 9b612ab4..6597cfad 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp
@@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply( const P2Function
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply( const P2Function
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix( const std::sh
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix( const std::sh
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp
index 271f6e6e..58026a0d 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0 : public Operator< P2Fu
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    540     755      46       4      4              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dst,
+                                                                           real_t* RESTRICT _data_srcEdge,
+                                                                           real_t* RESTRICT _data_srcVertex,
+                                                                           real_t           forVertex_0,
+                                                                           real_t           forVertex_1,
+                                                                           real_t           forVertex_2,
+                                                                           real_t           macro_vertex_coord_id_0comp0,
+                                                                           real_t           macro_vertex_coord_id_0comp1,
+                                                                           real_t           macro_vertex_coord_id_0comp2,
+                                                                           real_t           macro_vertex_coord_id_1comp0,
+                                                                           real_t           macro_vertex_coord_id_1comp1,
+                                                                           real_t           macro_vertex_coord_id_1comp2,
+                                                                           real_t           macro_vertex_coord_id_2comp0,
+                                                                           real_t           macro_vertex_coord_id_2comp1,
+                                                                           real_t           macro_vertex_coord_id_2comp2,
+                                                                           real_t           macro_vertex_coord_id_3comp0,
+                                                                           real_t           macro_vertex_coord_id_3comp1,
+                                                                           real_t           macro_vertex_coord_id_3comp2,
+                                                                           int64_t          micro_edges_per_macro_edge,
+                                                                           real_t           micro_edges_per_macro_edge_float,
+                                                                           real_t           radRayVertex,
+                                                                           real_t           radRefVertex,
+                                                                           real_t           rayVertex_0,
+                                                                           real_t           rayVertex_1,
+                                                                           real_t           rayVertex_2,
+                                                                           real_t           refVertex_0,
+                                                                           real_t           refVertex_1,
+                                                                           real_t           refVertex_2,
+                                                                           real_t           thrVertex_0,
+                                                                           real_t           thrVertex_1,
+                                                                           real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    500     715      46       4      4              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dst,
+                                                                              idx_t* RESTRICT _data_srcEdge,
+                                                                              idx_t* RESTRICT _data_srcVertex,
+                                                                              real_t          forVertex_0,
+                                                                              real_t          forVertex_1,
+                                                                              real_t          forVertex_2,
+                                                                              real_t          macro_vertex_coord_id_0comp0,
+                                                                              real_t          macro_vertex_coord_id_0comp1,
+                                                                              real_t          macro_vertex_coord_id_0comp2,
+                                                                              real_t          macro_vertex_coord_id_1comp0,
+                                                                              real_t          macro_vertex_coord_id_1comp1,
+                                                                              real_t          macro_vertex_coord_id_1comp2,
+                                                                              real_t          macro_vertex_coord_id_2comp0,
+                                                                              real_t          macro_vertex_coord_id_2comp1,
+                                                                              real_t          macro_vertex_coord_id_2comp2,
+                                                                              real_t          macro_vertex_coord_id_3comp0,
+                                                                              real_t          macro_vertex_coord_id_3comp1,
+                                                                              real_t          macro_vertex_coord_id_3comp2,
+                                                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                                                              int64_t micro_edges_per_macro_edge,
+                                                                              real_t  micro_edges_per_macro_edge_float,
+                                                                              real_t  radRayVertex,
+                                                                              real_t  radRefVertex,
+                                                                              real_t  rayVertex_0,
+                                                                              real_t  rayVertex_1,
+                                                                              real_t  rayVertex_2,
+                                                                              real_t  refVertex_0,
+                                                                              real_t  refVertex_1,
+                                                                              real_t  refVertex_2,
+                                                                              real_t  thrVertex_0,
+                                                                              real_t  thrVertex_1,
+                                                                              real_t  thrVertex_2 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp
index 9759b2b7..b7a33b38 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp
@@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply( const P2Function
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply( const P2Function
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix( const std::sh
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix( const std::sh
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp
index c4c588ca..b04b0891 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1 : public Operator< P2Fu
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    540     755      46       4      4              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dst,
+                                                                           real_t* RESTRICT _data_srcEdge,
+                                                                           real_t* RESTRICT _data_srcVertex,
+                                                                           real_t           forVertex_0,
+                                                                           real_t           forVertex_1,
+                                                                           real_t           forVertex_2,
+                                                                           real_t           macro_vertex_coord_id_0comp0,
+                                                                           real_t           macro_vertex_coord_id_0comp1,
+                                                                           real_t           macro_vertex_coord_id_0comp2,
+                                                                           real_t           macro_vertex_coord_id_1comp0,
+                                                                           real_t           macro_vertex_coord_id_1comp1,
+                                                                           real_t           macro_vertex_coord_id_1comp2,
+                                                                           real_t           macro_vertex_coord_id_2comp0,
+                                                                           real_t           macro_vertex_coord_id_2comp1,
+                                                                           real_t           macro_vertex_coord_id_2comp2,
+                                                                           real_t           macro_vertex_coord_id_3comp0,
+                                                                           real_t           macro_vertex_coord_id_3comp1,
+                                                                           real_t           macro_vertex_coord_id_3comp2,
+                                                                           int64_t          micro_edges_per_macro_edge,
+                                                                           real_t           micro_edges_per_macro_edge_float,
+                                                                           real_t           radRayVertex,
+                                                                           real_t           radRefVertex,
+                                                                           real_t           rayVertex_0,
+                                                                           real_t           rayVertex_1,
+                                                                           real_t           rayVertex_2,
+                                                                           real_t           refVertex_0,
+                                                                           real_t           refVertex_1,
+                                                                           real_t           refVertex_2,
+                                                                           real_t           thrVertex_0,
+                                                                           real_t           thrVertex_1,
+                                                                           real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    500     715      46       4      4              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT _data_dst,
+                                                                              idx_t* RESTRICT _data_srcEdge,
+                                                                              idx_t* RESTRICT _data_srcVertex,
+                                                                              real_t          forVertex_0,
+                                                                              real_t          forVertex_1,
+                                                                              real_t          forVertex_2,
+                                                                              real_t          macro_vertex_coord_id_0comp0,
+                                                                              real_t          macro_vertex_coord_id_0comp1,
+                                                                              real_t          macro_vertex_coord_id_0comp2,
+                                                                              real_t          macro_vertex_coord_id_1comp0,
+                                                                              real_t          macro_vertex_coord_id_1comp1,
+                                                                              real_t          macro_vertex_coord_id_1comp2,
+                                                                              real_t          macro_vertex_coord_id_2comp0,
+                                                                              real_t          macro_vertex_coord_id_2comp1,
+                                                                              real_t          macro_vertex_coord_id_2comp2,
+                                                                              real_t          macro_vertex_coord_id_3comp0,
+                                                                              real_t          macro_vertex_coord_id_3comp1,
+                                                                              real_t          macro_vertex_coord_id_3comp2,
+                                                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                                                              int64_t micro_edges_per_macro_edge,
+                                                                              real_t  micro_edges_per_macro_edge_float,
+                                                                              real_t  radRayVertex,
+                                                                              real_t  radRefVertex,
+                                                                              real_t  rayVertex_0,
+                                                                              real_t  rayVertex_1,
+                                                                              real_t  rayVertex_2,
+                                                                              real_t  refVertex_0,
+                                                                              real_t  refVertex_1,
+                                                                              real_t  refVertex_2,
+                                                                              real_t  thrVertex_0,
+                                                                              real_t  thrVertex_1,
+                                                                              real_t  thrVertex_2 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp
index 7099d22e..f92a94c7 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp
@@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply( const P2Function
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply( const P2Function
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix( const std::sh
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix( const std::sh
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp
index abb88eb5..d0df6fe8 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2 : public Operator< P2Fu
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    540     755      46       4      4              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dst,
+                                                                           real_t* RESTRICT _data_srcEdge,
+                                                                           real_t* RESTRICT _data_srcVertex,
+                                                                           real_t           forVertex_0,
+                                                                           real_t           forVertex_1,
+                                                                           real_t           forVertex_2,
+                                                                           real_t           macro_vertex_coord_id_0comp0,
+                                                                           real_t           macro_vertex_coord_id_0comp1,
+                                                                           real_t           macro_vertex_coord_id_0comp2,
+                                                                           real_t           macro_vertex_coord_id_1comp0,
+                                                                           real_t           macro_vertex_coord_id_1comp1,
+                                                                           real_t           macro_vertex_coord_id_1comp2,
+                                                                           real_t           macro_vertex_coord_id_2comp0,
+                                                                           real_t           macro_vertex_coord_id_2comp1,
+                                                                           real_t           macro_vertex_coord_id_2comp2,
+                                                                           real_t           macro_vertex_coord_id_3comp0,
+                                                                           real_t           macro_vertex_coord_id_3comp1,
+                                                                           real_t           macro_vertex_coord_id_3comp2,
+                                                                           int64_t          micro_edges_per_macro_edge,
+                                                                           real_t           micro_edges_per_macro_edge_float,
+                                                                           real_t           radRayVertex,
+                                                                           real_t           radRefVertex,
+                                                                           real_t           rayVertex_0,
+                                                                           real_t           rayVertex_1,
+                                                                           real_t           rayVertex_2,
+                                                                           real_t           refVertex_0,
+                                                                           real_t           refVertex_1,
+                                                                           real_t           refVertex_2,
+                                                                           real_t           thrVertex_0,
+                                                                           real_t           thrVertex_1,
+                                                                           real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    500     715      46       4      4              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT _data_dst,
+                                                                              idx_t* RESTRICT _data_srcEdge,
+                                                                              idx_t* RESTRICT _data_srcVertex,
+                                                                              real_t          forVertex_0,
+                                                                              real_t          forVertex_1,
+                                                                              real_t          forVertex_2,
+                                                                              real_t          macro_vertex_coord_id_0comp0,
+                                                                              real_t          macro_vertex_coord_id_0comp1,
+                                                                              real_t          macro_vertex_coord_id_0comp2,
+                                                                              real_t          macro_vertex_coord_id_1comp0,
+                                                                              real_t          macro_vertex_coord_id_1comp1,
+                                                                              real_t          macro_vertex_coord_id_1comp2,
+                                                                              real_t          macro_vertex_coord_id_2comp0,
+                                                                              real_t          macro_vertex_coord_id_2comp1,
+                                                                              real_t          macro_vertex_coord_id_2comp2,
+                                                                              real_t          macro_vertex_coord_id_3comp0,
+                                                                              real_t          macro_vertex_coord_id_3comp1,
+                                                                              real_t          macro_vertex_coord_id_3comp2,
+                                                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                                                              int64_t micro_edges_per_macro_edge,
+                                                                              real_t  micro_edges_per_macro_edge_float,
+                                                                              real_t  radRayVertex,
+                                                                              real_t  radRefVertex,
+                                                                              real_t  rayVertex_0,
+                                                                              real_t  rayVertex_1,
+                                                                              real_t  rayVertex_2,
+                                                                              real_t  refVertex_0,
+                                                                              real_t  refVertex_1,
+                                                                              real_t  refVertex_2,
+                                                                              real_t  thrVertex_0,
+                                                                              real_t  thrVertex_1,
+                                                                              real_t  thrVertex_2 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp
index 4b0f4130..e03a5a1a 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp
@@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ToP1ElementwiseDivergence_0_0_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -194,7 +195,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ToP1ElementwiseDivergence_0_0_macro_2D(
 
              _data_dst,
              _data_srcEdge,
@@ -207,6 +208,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -268,7 +270,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -288,6 +290,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -317,7 +320,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D(
 
              _data_dst,
              _data_srcEdge,
@@ -331,6 +334,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp
index 653ee1b0..8657dc17 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -74,88 +76,107 @@ class P2ToP1ElementwiseDivergence_0_0 : public Operator< P2Function< real_t >, P
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseDivergence_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    116     132      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t* RESTRICT _data_dst,
+                                                        real_t* RESTRICT _data_srcEdge,
+                                                        real_t* RESTRICT _data_srcVertex,
+                                                        real_t           macro_vertex_coord_id_0comp0,
+                                                        real_t           macro_vertex_coord_id_0comp1,
+                                                        real_t           macro_vertex_coord_id_1comp0,
+                                                        real_t           macro_vertex_coord_id_1comp1,
+                                                        real_t           macro_vertex_coord_id_2comp0,
+                                                        real_t           macro_vertex_coord_id_2comp1,
+                                                        int64_t          micro_edges_per_macro_edge,
+                                                        real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergence_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    345     352      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t* RESTRICT _data_dst,
+                                                        real_t* RESTRICT _data_srcEdge,
+                                                        real_t* RESTRICT _data_srcVertex,
+                                                        real_t           macro_vertex_coord_id_0comp0,
+                                                        real_t           macro_vertex_coord_id_0comp1,
+                                                        real_t           macro_vertex_coord_id_0comp2,
+                                                        real_t           macro_vertex_coord_id_1comp0,
+                                                        real_t           macro_vertex_coord_id_1comp1,
+                                                        real_t           macro_vertex_coord_id_1comp2,
+                                                        real_t           macro_vertex_coord_id_2comp0,
+                                                        real_t           macro_vertex_coord_id_2comp1,
+                                                        real_t           macro_vertex_coord_id_2comp2,
+                                                        real_t           macro_vertex_coord_id_3comp0,
+                                                        real_t           macro_vertex_coord_id_3comp1,
+                                                        real_t           macro_vertex_coord_id_3comp2,
+                                                        int64_t          micro_edges_per_macro_edge,
+                                                        real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergence_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     98     114      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                                           idx_t* RESTRICT                      _data_srcEdge,
+                                                           idx_t* RESTRICT                      _data_srcVertex,
+                                                           real_t                               macro_vertex_coord_id_0comp0,
+                                                           real_t                               macro_vertex_coord_id_0comp1,
+                                                           real_t                               macro_vertex_coord_id_1comp0,
+                                                           real_t                               macro_vertex_coord_id_1comp1,
+                                                           real_t                               macro_vertex_coord_id_2comp0,
+                                                           real_t                               macro_vertex_coord_id_2comp1,
+                                                           std::shared_ptr< SparseMatrixProxy > mat,
+                                                           int64_t                              micro_edges_per_macro_edge,
+                                                           real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergence_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    305     312      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                                           idx_t* RESTRICT                      _data_srcEdge,
+                                                           idx_t* RESTRICT                      _data_srcVertex,
+                                                           real_t                               macro_vertex_coord_id_0comp0,
+                                                           real_t                               macro_vertex_coord_id_0comp1,
+                                                           real_t                               macro_vertex_coord_id_0comp2,
+                                                           real_t                               macro_vertex_coord_id_1comp0,
+                                                           real_t                               macro_vertex_coord_id_1comp1,
+                                                           real_t                               macro_vertex_coord_id_1comp2,
+                                                           real_t                               macro_vertex_coord_id_2comp0,
+                                                           real_t                               macro_vertex_coord_id_2comp1,
+                                                           real_t                               macro_vertex_coord_id_2comp2,
+                                                           real_t                               macro_vertex_coord_id_3comp0,
+                                                           real_t                               macro_vertex_coord_id_3comp1,
+                                                           real_t                               macro_vertex_coord_id_3comp2,
+                                                           std::shared_ptr< SparseMatrixProxy > mat,
+                                                           int64_t                              micro_edges_per_macro_edge,
+                                                           real_t micro_edges_per_macro_edge_float ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp
index e0bc14a7..25f0678d 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp
@@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ToP1ElementwiseDivergence_0_1_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -194,7 +195,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ToP1ElementwiseDivergence_0_1_macro_2D(
 
              _data_dst,
              _data_srcEdge,
@@ -207,6 +208,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -268,7 +270,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -288,6 +290,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -317,7 +320,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D(
 
              _data_dst,
              _data_srcEdge,
@@ -331,6 +334,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp
index 58716343..389d7638 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -74,88 +76,107 @@ class P2ToP1ElementwiseDivergence_0_1 : public Operator< P2Function< real_t >, P
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseDivergence_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    116     132      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t* RESTRICT _data_dst,
+                                                        real_t* RESTRICT _data_srcEdge,
+                                                        real_t* RESTRICT _data_srcVertex,
+                                                        real_t           macro_vertex_coord_id_0comp0,
+                                                        real_t           macro_vertex_coord_id_0comp1,
+                                                        real_t           macro_vertex_coord_id_1comp0,
+                                                        real_t           macro_vertex_coord_id_1comp1,
+                                                        real_t           macro_vertex_coord_id_2comp0,
+                                                        real_t           macro_vertex_coord_id_2comp1,
+                                                        int64_t          micro_edges_per_macro_edge,
+                                                        real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergence_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    345     352      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t* RESTRICT _data_dst,
+                                                        real_t* RESTRICT _data_srcEdge,
+                                                        real_t* RESTRICT _data_srcVertex,
+                                                        real_t           macro_vertex_coord_id_0comp0,
+                                                        real_t           macro_vertex_coord_id_0comp1,
+                                                        real_t           macro_vertex_coord_id_0comp2,
+                                                        real_t           macro_vertex_coord_id_1comp0,
+                                                        real_t           macro_vertex_coord_id_1comp1,
+                                                        real_t           macro_vertex_coord_id_1comp2,
+                                                        real_t           macro_vertex_coord_id_2comp0,
+                                                        real_t           macro_vertex_coord_id_2comp1,
+                                                        real_t           macro_vertex_coord_id_2comp2,
+                                                        real_t           macro_vertex_coord_id_3comp0,
+                                                        real_t           macro_vertex_coord_id_3comp1,
+                                                        real_t           macro_vertex_coord_id_3comp2,
+                                                        int64_t          micro_edges_per_macro_edge,
+                                                        real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergence_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     98     114      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                                           idx_t* RESTRICT                      _data_srcEdge,
+                                                           idx_t* RESTRICT                      _data_srcVertex,
+                                                           real_t                               macro_vertex_coord_id_0comp0,
+                                                           real_t                               macro_vertex_coord_id_0comp1,
+                                                           real_t                               macro_vertex_coord_id_1comp0,
+                                                           real_t                               macro_vertex_coord_id_1comp1,
+                                                           real_t                               macro_vertex_coord_id_2comp0,
+                                                           real_t                               macro_vertex_coord_id_2comp1,
+                                                           std::shared_ptr< SparseMatrixProxy > mat,
+                                                           int64_t                              micro_edges_per_macro_edge,
+                                                           real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergence_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    305     312      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                                           idx_t* RESTRICT                      _data_srcEdge,
+                                                           idx_t* RESTRICT                      _data_srcVertex,
+                                                           real_t                               macro_vertex_coord_id_0comp0,
+                                                           real_t                               macro_vertex_coord_id_0comp1,
+                                                           real_t                               macro_vertex_coord_id_0comp2,
+                                                           real_t                               macro_vertex_coord_id_1comp0,
+                                                           real_t                               macro_vertex_coord_id_1comp1,
+                                                           real_t                               macro_vertex_coord_id_1comp2,
+                                                           real_t                               macro_vertex_coord_id_2comp0,
+                                                           real_t                               macro_vertex_coord_id_2comp1,
+                                                           real_t                               macro_vertex_coord_id_2comp2,
+                                                           real_t                               macro_vertex_coord_id_3comp0,
+                                                           real_t                               macro_vertex_coord_id_3comp1,
+                                                           real_t                               macro_vertex_coord_id_3comp2,
+                                                           std::shared_ptr< SparseMatrixProxy > mat,
+                                                           int64_t                              micro_edges_per_macro_edge,
+                                                           real_t micro_edges_per_macro_edge_float ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp
index 920dcae6..5a88c930 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp
@@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_2::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ToP1ElementwiseDivergence_0_2_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_2::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -212,7 +213,7 @@ void P2ToP1ElementwiseDivergence_0_2::toMatrix( const std::shared_ptr< SparseMat
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D(
 
              _data_dst,
              _data_srcEdge,
@@ -232,6 +233,7 @@ void P2ToP1ElementwiseDivergence_0_2::toMatrix( const std::shared_ptr< SparseMat
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp
index ebec476b..01a9a941 100644
--- a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp
+++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -74,53 +76,62 @@ class P2ToP1ElementwiseDivergence_0_2 : public Operator< P2Function< real_t >, P
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseDivergence_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    345     352      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t* RESTRICT _data_dst,
+                                                        real_t* RESTRICT _data_srcEdge,
+                                                        real_t* RESTRICT _data_srcVertex,
+                                                        real_t           macro_vertex_coord_id_0comp0,
+                                                        real_t           macro_vertex_coord_id_0comp1,
+                                                        real_t           macro_vertex_coord_id_0comp2,
+                                                        real_t           macro_vertex_coord_id_1comp0,
+                                                        real_t           macro_vertex_coord_id_1comp1,
+                                                        real_t           macro_vertex_coord_id_1comp2,
+                                                        real_t           macro_vertex_coord_id_2comp0,
+                                                        real_t           macro_vertex_coord_id_2comp1,
+                                                        real_t           macro_vertex_coord_id_2comp2,
+                                                        real_t           macro_vertex_coord_id_3comp0,
+                                                        real_t           macro_vertex_coord_id_3comp1,
+                                                        real_t           macro_vertex_coord_id_3comp2,
+                                                        int64_t          micro_edges_per_macro_edge,
+                                                        real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseDivergence_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    305     312      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                                           idx_t* RESTRICT                      _data_srcEdge,
+                                                           idx_t* RESTRICT                      _data_srcVertex,
+                                                           real_t                               macro_vertex_coord_id_0comp0,
+                                                           real_t                               macro_vertex_coord_id_0comp1,
+                                                           real_t                               macro_vertex_coord_id_0comp2,
+                                                           real_t                               macro_vertex_coord_id_1comp0,
+                                                           real_t                               macro_vertex_coord_id_1comp1,
+                                                           real_t                               macro_vertex_coord_id_1comp2,
+                                                           real_t                               macro_vertex_coord_id_2comp0,
+                                                           real_t                               macro_vertex_coord_id_2comp1,
+                                                           real_t                               macro_vertex_coord_id_2comp2,
+                                                           real_t                               macro_vertex_coord_id_3comp0,
+                                                           real_t                               macro_vertex_coord_id_3comp1,
+                                                           real_t                               macro_vertex_coord_id_3comp2,
+                                                           std::shared_ptr< SparseMatrixProxy > mat,
+                                                           int64_t                              micro_edges_per_macro_edge,
+                                                           real_t micro_edges_per_macro_edge_float ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
index bd473d03..47c44040 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
index 2ca6bcae..157bb143 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
index 96df5b0a..4324487f 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
index ea48849b..fff3e8c5 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
index 334eca5c..60540552 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
index 9b6ca0f6..f0ee48b0 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_0::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
index 94e211f5..7b2391e3 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_0::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
index bcc2e8f2..9bed3fde 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_1::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
index 4dcd5815..219f1791 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_1::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
similarity index 99%
rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp
rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
index 9a37b8db..abbde6d1 100644
--- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_2::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_2::apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
similarity index 98%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
index f0fc0ab3..e4242583 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
similarity index 98%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
index 3985fe5a..107518f4 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
similarity index 98%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
index 2622ca33..ccd65bb8 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
similarity index 98%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
index 1304883d..72c271f7 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
index 82535c45..c6e331b6 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
index a2d064fb..20197c4c 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
index ebb2f49c..ee1c6e1c 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
index 396cc539..c54ae33f 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
index 2442c50d..0a08c166 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
index 3fba24ce..f140d92f 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
similarity index 98%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
index 0be5acbd..2f4fbfbd 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_0::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
index b6434286..acc9c0d1 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_0::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
similarity index 98%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
index a1694a9e..600e403c 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_0::toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
index 71006702..73af2956 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_0::toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
similarity index 98%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
index 2057cab6..4f36c2a3 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_1::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
index ad720e9c..d957a1ed 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_1::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
similarity index 98%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
index a390a762..bbd6556c 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_1::toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
index 3ca38cd9..8d8310e2 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_1::toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
index 390f8c57..e11280ec 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_2::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_2::apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
similarity index 99%
rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp
rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
index 094ca101..391888d7 100644
--- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseDivergence_0_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseDivergence_0_2::toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/epsilon/CMakeLists.txt b/operators/epsilon/CMakeLists.txt
index ed34aa1e..9e6dfb23 100644
--- a/operators/epsilon/CMakeLists.txt
+++ b/operators/epsilon/CMakeLists.txt
@@ -49,108 +49,108 @@ add_library( opgen-epsilon
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-epsilon PRIVATE
 
-      avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp
-      avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
+      avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
+      avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -161,68 +161,68 @@ else()
 
    target_sources(opgen-epsilon PRIVATE
 
-      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp
+      noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
+      noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp
index 19216374..c957ab96 100644
--- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix( const std::shared_ptr< Sparse
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix( const std::shared_ptr< Sparse
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -349,7 +351,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -371,6 +373,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues()
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp
index 8dc175a8..81612f44 100644
--- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -89,89 +91,103 @@ class P2ElementwiseEpsilonAnnulusMap_0_0 : public Operator< P2Function< real_t >
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    756    1132      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                           real_t* RESTRICT _data_dstVertex,
+                                                           real_t* RESTRICT _data_muEdge,
+                                                           real_t* RESTRICT _data_muVertex,
+                                                           real_t* RESTRICT _data_srcEdge,
+                                                           real_t* RESTRICT _data_srcVertex,
+                                                           real_t           macro_vertex_coord_id_0comp0,
+                                                           real_t           macro_vertex_coord_id_0comp1,
+                                                           real_t           macro_vertex_coord_id_1comp0,
+                                                           real_t           macro_vertex_coord_id_1comp1,
+                                                           real_t           macro_vertex_coord_id_2comp0,
+                                                           real_t           macro_vertex_coord_id_2comp1,
+                                                           int64_t          micro_edges_per_macro_edge,
+                                                           real_t           micro_edges_per_macro_edge_float,
+                                                           real_t           radRayVertex,
+                                                           real_t           radRefVertex,
+                                                           real_t           rayVertex_0,
+                                                           real_t           rayVertex_1,
+                                                           real_t           refVertex_0,
+                                                           real_t           refVertex_1,
+                                                           real_t           thrVertex_0,
+                                                           real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    720    1096      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                              idx_t* RESTRICT                      _data_dstVertex,
+                                                              real_t* RESTRICT                     _data_muEdge,
+                                                              real_t* RESTRICT                     _data_muVertex,
+                                                              idx_t* RESTRICT                      _data_srcEdge,
+                                                              idx_t* RESTRICT                      _data_srcVertex,
+                                                              real_t                               macro_vertex_coord_id_0comp0,
+                                                              real_t                               macro_vertex_coord_id_0comp1,
+                                                              real_t                               macro_vertex_coord_id_1comp0,
+                                                              real_t                               macro_vertex_coord_id_1comp1,
+                                                              real_t                               macro_vertex_coord_id_2comp0,
+                                                              real_t                               macro_vertex_coord_id_2comp1,
+                                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                                              int64_t                              micro_edges_per_macro_edge,
+                                                              real_t micro_edges_per_macro_edge_float,
+                                                              real_t radRayVertex,
+                                                              real_t radRefVertex,
+                                                              real_t rayVertex_0,
+                                                              real_t rayVertex_1,
+                                                              real_t refVertex_0,
+                                                              real_t refVertex_1,
+                                                              real_t thrVertex_0,
+                                                              real_t thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    546     916      28      20      4              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                          real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                          real_t* RESTRICT _data_muEdge,
+                                                                                          real_t* RESTRICT _data_muVertex,
+                                                                                          real_t  macro_vertex_coord_id_0comp0,
+                                                                                          real_t  macro_vertex_coord_id_0comp1,
+                                                                                          real_t  macro_vertex_coord_id_1comp0,
+                                                                                          real_t  macro_vertex_coord_id_1comp1,
+                                                                                          real_t  macro_vertex_coord_id_2comp0,
+                                                                                          real_t  macro_vertex_coord_id_2comp1,
+                                                                                          int64_t micro_edges_per_macro_edge,
+                                                                                          real_t micro_edges_per_macro_edge_float,
+                                                                                          real_t radRayVertex,
+                                                                                          real_t radRefVertex,
+                                                                                          real_t rayVertex_0,
+                                                                                          real_t rayVertex_1,
+                                                                                          real_t refVertex_0,
+                                                                                          real_t refVertex_1,
+                                                                                          real_t thrVertex_0,
+                                                                                          real_t thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp
index 64e79f28..f95365a7 100644
--- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix( const std::shared_ptr< Sparse
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix( const std::shared_ptr< Sparse
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp
index a14a2551..a702fc80 100644
--- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,63 +85,72 @@ class P2ElementwiseEpsilonAnnulusMap_0_1 : public Operator< P2Function< real_t >
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    936    1192      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                           real_t* RESTRICT _data_dstVertex,
+                                                           real_t* RESTRICT _data_muEdge,
+                                                           real_t* RESTRICT _data_muVertex,
+                                                           real_t* RESTRICT _data_srcEdge,
+                                                           real_t* RESTRICT _data_srcVertex,
+                                                           real_t           macro_vertex_coord_id_0comp0,
+                                                           real_t           macro_vertex_coord_id_0comp1,
+                                                           real_t           macro_vertex_coord_id_1comp0,
+                                                           real_t           macro_vertex_coord_id_1comp1,
+                                                           real_t           macro_vertex_coord_id_2comp0,
+                                                           real_t           macro_vertex_coord_id_2comp1,
+                                                           int64_t          micro_edges_per_macro_edge,
+                                                           real_t           micro_edges_per_macro_edge_float,
+                                                           real_t           radRayVertex,
+                                                           real_t           radRefVertex,
+                                                           real_t           rayVertex_0,
+                                                           real_t           rayVertex_1,
+                                                           real_t           refVertex_0,
+                                                           real_t           refVertex_1,
+                                                           real_t           thrVertex_0,
+                                                           real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    900    1156      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                              idx_t* RESTRICT                      _data_dstVertex,
+                                                              real_t* RESTRICT                     _data_muEdge,
+                                                              real_t* RESTRICT                     _data_muVertex,
+                                                              idx_t* RESTRICT                      _data_srcEdge,
+                                                              idx_t* RESTRICT                      _data_srcVertex,
+                                                              real_t                               macro_vertex_coord_id_0comp0,
+                                                              real_t                               macro_vertex_coord_id_0comp1,
+                                                              real_t                               macro_vertex_coord_id_1comp0,
+                                                              real_t                               macro_vertex_coord_id_1comp1,
+                                                              real_t                               macro_vertex_coord_id_2comp0,
+                                                              real_t                               macro_vertex_coord_id_2comp1,
+                                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                                              int64_t                              micro_edges_per_macro_edge,
+                                                              real_t micro_edges_per_macro_edge_float,
+                                                              real_t radRayVertex,
+                                                              real_t radRefVertex,
+                                                              real_t rayVertex_0,
+                                                              real_t rayVertex_1,
+                                                              real_t refVertex_0,
+                                                              real_t refVertex_1,
+                                                              real_t thrVertex_0,
+                                                              real_t thrVertex_1 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp
index 1dc47728..edd35d7b 100644
--- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix( const std::shared_ptr< Sparse
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix( const std::shared_ptr< Sparse
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp
index eba130ce..4d137d85 100644
--- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,63 +85,72 @@ class P2ElementwiseEpsilonAnnulusMap_1_0 : public Operator< P2Function< real_t >
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    936    1192      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                           real_t* RESTRICT _data_dstVertex,
+                                                           real_t* RESTRICT _data_muEdge,
+                                                           real_t* RESTRICT _data_muVertex,
+                                                           real_t* RESTRICT _data_srcEdge,
+                                                           real_t* RESTRICT _data_srcVertex,
+                                                           real_t           macro_vertex_coord_id_0comp0,
+                                                           real_t           macro_vertex_coord_id_0comp1,
+                                                           real_t           macro_vertex_coord_id_1comp0,
+                                                           real_t           macro_vertex_coord_id_1comp1,
+                                                           real_t           macro_vertex_coord_id_2comp0,
+                                                           real_t           macro_vertex_coord_id_2comp1,
+                                                           int64_t          micro_edges_per_macro_edge,
+                                                           real_t           micro_edges_per_macro_edge_float,
+                                                           real_t           radRayVertex,
+                                                           real_t           radRefVertex,
+                                                           real_t           rayVertex_0,
+                                                           real_t           rayVertex_1,
+                                                           real_t           refVertex_0,
+                                                           real_t           refVertex_1,
+                                                           real_t           thrVertex_0,
+                                                           real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    900    1156      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                              idx_t* RESTRICT                      _data_dstVertex,
+                                                              real_t* RESTRICT                     _data_muEdge,
+                                                              real_t* RESTRICT                     _data_muVertex,
+                                                              idx_t* RESTRICT                      _data_srcEdge,
+                                                              idx_t* RESTRICT                      _data_srcVertex,
+                                                              real_t                               macro_vertex_coord_id_0comp0,
+                                                              real_t                               macro_vertex_coord_id_0comp1,
+                                                              real_t                               macro_vertex_coord_id_1comp0,
+                                                              real_t                               macro_vertex_coord_id_1comp1,
+                                                              real_t                               macro_vertex_coord_id_2comp0,
+                                                              real_t                               macro_vertex_coord_id_2comp1,
+                                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                                              int64_t                              micro_edges_per_macro_edge,
+                                                              real_t micro_edges_per_macro_edge_float,
+                                                              real_t radRayVertex,
+                                                              real_t radRefVertex,
+                                                              real_t rayVertex_0,
+                                                              real_t rayVertex_1,
+                                                              real_t refVertex_0,
+                                                              real_t refVertex_1,
+                                                              real_t thrVertex_0,
+                                                              real_t thrVertex_1 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp
index e42063e8..3a421100 100644
--- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix( const std::shared_ptr< Sparse
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix( const std::shared_ptr< Sparse
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -349,7 +351,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -371,6 +373,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues()
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp
index 8233c00e..8c85f610 100644
--- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -89,89 +91,103 @@ class P2ElementwiseEpsilonAnnulusMap_1_1 : public Operator< P2Function< real_t >
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    756    1132      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                           real_t* RESTRICT _data_dstVertex,
+                                                           real_t* RESTRICT _data_muEdge,
+                                                           real_t* RESTRICT _data_muVertex,
+                                                           real_t* RESTRICT _data_srcEdge,
+                                                           real_t* RESTRICT _data_srcVertex,
+                                                           real_t           macro_vertex_coord_id_0comp0,
+                                                           real_t           macro_vertex_coord_id_0comp1,
+                                                           real_t           macro_vertex_coord_id_1comp0,
+                                                           real_t           macro_vertex_coord_id_1comp1,
+                                                           real_t           macro_vertex_coord_id_2comp0,
+                                                           real_t           macro_vertex_coord_id_2comp1,
+                                                           int64_t          micro_edges_per_macro_edge,
+                                                           real_t           micro_edges_per_macro_edge_float,
+                                                           real_t           radRayVertex,
+                                                           real_t           radRefVertex,
+                                                           real_t           rayVertex_0,
+                                                           real_t           rayVertex_1,
+                                                           real_t           refVertex_0,
+                                                           real_t           refVertex_1,
+                                                           real_t           thrVertex_0,
+                                                           real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    720    1096      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                              idx_t* RESTRICT                      _data_dstVertex,
+                                                              real_t* RESTRICT                     _data_muEdge,
+                                                              real_t* RESTRICT                     _data_muVertex,
+                                                              idx_t* RESTRICT                      _data_srcEdge,
+                                                              idx_t* RESTRICT                      _data_srcVertex,
+                                                              real_t                               macro_vertex_coord_id_0comp0,
+                                                              real_t                               macro_vertex_coord_id_0comp1,
+                                                              real_t                               macro_vertex_coord_id_1comp0,
+                                                              real_t                               macro_vertex_coord_id_1comp1,
+                                                              real_t                               macro_vertex_coord_id_2comp0,
+                                                              real_t                               macro_vertex_coord_id_2comp1,
+                                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                                              int64_t                              micro_edges_per_macro_edge,
+                                                              real_t micro_edges_per_macro_edge_float,
+                                                              real_t radRayVertex,
+                                                              real_t radRefVertex,
+                                                              real_t rayVertex_0,
+                                                              real_t rayVertex_1,
+                                                              real_t refVertex_0,
+                                                              real_t refVertex_1,
+                                                              real_t thrVertex_0,
+                                                              real_t thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    546     916      28      20      4              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                          real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                          real_t* RESTRICT _data_muEdge,
+                                                                                          real_t* RESTRICT _data_muVertex,
+                                                                                          real_t  macro_vertex_coord_id_0comp0,
+                                                                                          real_t  macro_vertex_coord_id_0comp1,
+                                                                                          real_t  macro_vertex_coord_id_1comp0,
+                                                                                          real_t  macro_vertex_coord_id_1comp1,
+                                                                                          real_t  macro_vertex_coord_id_2comp0,
+                                                                                          real_t  macro_vertex_coord_id_2comp1,
+                                                                                          int64_t micro_edges_per_macro_edge,
+                                                                                          real_t micro_edges_per_macro_edge_float,
+                                                                                          real_t radRayVertex,
+                                                                                          real_t radRefVertex,
+                                                                                          real_t rayVertex_0,
+                                                                                          real_t rayVertex_1,
+                                                                                          real_t refVertex_0,
+                                                                                          real_t refVertex_1,
+                                                                                          real_t thrVertex_0,
+                                                                                          real_t thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp
index a65fdf3f..ad2f1db1 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperator
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperator
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp
index d7ff98d6..28b2a406 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_0 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3038    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2938    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2048    3397      66      15      5              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp
index 94573669..bb2a2f72 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp
index 9dae9aad..f477f5d3 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_1 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3583    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3483    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp
index bd6716ee..2bcf6f1f 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp
index 91701d41..269b0394 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_2 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3583    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3483    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp
index f4acec7b..0851bfd7 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp
index 832bb4d7..26e41a15 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_0 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3583    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3483    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp
index 39237dc7..03369a4c 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperator
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperator
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp
index a22e0b98..9311dbc3 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_1 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3038    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2938    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2048    3397      66      15      5              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp
index 4605790e..6b147ca6 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp
index 5592492c..9e94c249 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_2 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3583    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3483    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp
index e065d633..27352945 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp
index 8ae0e66c..a2020310 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_0 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3583    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3483    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp
index f5cc8cd0..92a66ed3 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp
index 324284a6..db912ccf 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_1 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3583    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3483    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp
index 18938aa2..8052ed92 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply( const P2Function< real_
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply( const P2Function< real_
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix( const std::shared_pt
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix( const std::shared_pt
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperator
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperator
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp
index 49b5755f..ce9ba70f 100644
--- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_2 : public Operator< P2Function<
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3038    4397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                    real_t* RESTRICT _data_dstVertex,
+                                                                    real_t* RESTRICT _data_muEdge,
+                                                                    real_t* RESTRICT _data_muVertex,
+                                                                    real_t* RESTRICT _data_srcEdge,
+                                                                    real_t* RESTRICT _data_srcVertex,
+                                                                    real_t           forVertex_0,
+                                                                    real_t           forVertex_1,
+                                                                    real_t           forVertex_2,
+                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                    real_t           micro_edges_per_macro_edge_float,
+                                                                    real_t           radRayVertex,
+                                                                    real_t           radRefVertex,
+                                                                    real_t           rayVertex_0,
+                                                                    real_t           rayVertex_1,
+                                                                    real_t           rayVertex_2,
+                                                                    real_t           refVertex_0,
+                                                                    real_t           refVertex_1,
+                                                                    real_t           refVertex_2,
+                                                                    real_t           thrVertex_0,
+                                                                    real_t           thrVertex_1,
+                                                                    real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2938    4297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                       idx_t* RESTRICT  _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       idx_t* RESTRICT  _data_srcEdge,
+                                                                       idx_t* RESTRICT  _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                                       int64_t micro_edges_per_macro_edge,
+                                                                       real_t  micro_edges_per_macro_edge_float,
+                                                                       real_t  radRayVertex,
+                                                                       real_t  radRefVertex,
+                                                                       real_t  rayVertex_0,
+                                                                       real_t  rayVertex_1,
+                                                                       real_t  rayVertex_2,
+                                                                       real_t  refVertex_0,
+                                                                       real_t  refVertex_1,
+                                                                       real_t  refVertex_2,
+                                                                       real_t  thrVertex_0,
+                                                                       real_t  thrVertex_1,
+                                                                       real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2048    3397      66      15      5              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp
index a9ec5f85..0fdca154 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseEpsilon_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseEpsilon_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -440,7 +444,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -460,6 +464,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -504,7 +509,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -518,6 +523,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp
index 0c72a0ab..2c51d0da 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -88,142 +90,171 @@ class P2ElementwiseEpsilon_0_0 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    146     144      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    449     436      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    110     108      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseEpsilon_0_0_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    349     336      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseEpsilon_0_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     71      63      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                real_t* RESTRICT _data_muEdge,
+                                                                                real_t* RESTRICT _data_muVertex,
+                                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                                int64_t          micro_edges_per_macro_edge,
+                                                                                real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    179     156      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                real_t* RESTRICT _data_muEdge,
+                                                                                real_t* RESTRICT _data_muVertex,
+                                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                                real_t           macro_vertex_coord_id_0comp2,
+                                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                                real_t           macro_vertex_coord_id_1comp2,
+                                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                                real_t           macro_vertex_coord_id_2comp2,
+                                                                                real_t           macro_vertex_coord_id_3comp0,
+                                                                                real_t           macro_vertex_coord_id_3comp1,
+                                                                                real_t           macro_vertex_coord_id_3comp2,
+                                                                                int64_t          micro_edges_per_macro_edge,
+                                                                                real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp
index 96e1003e..e1c16d43 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_0_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseEpsilon_0_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_0_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseEpsilon_0_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp
index a27df5e5..78c96a47 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,100 +84,119 @@ class P2ElementwiseEpsilon_0_1 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    191     189      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    155     153      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseEpsilon_0_1_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseEpsilon_0_1_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp
index 5a5ece8e..49e11fc2 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_0_2::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_0_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_0_2::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_0_2::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_0_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_0_2::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp
index 6d2a6616..ab6f49ec 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_0_2 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseEpsilon_0_2_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp
index 2381b5ed..666edf6b 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseEpsilon_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseEpsilon_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp
index 7ef99417..a7fdce7d 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,100 +84,119 @@ class P2ElementwiseEpsilon_1_0 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    191     189      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    155     153      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseEpsilon_1_0_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseEpsilon_1_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp
index 3abb5d43..37551a93 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_1_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseEpsilon_1_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_1_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseEpsilon_1_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -440,7 +444,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -460,6 +464,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -504,7 +509,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -518,6 +523,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp
index 13bcd9f6..db1ad8ea 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -88,142 +90,171 @@ class P2ElementwiseEpsilon_1_1 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    146     144      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    449     436      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    110     108      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseEpsilon_1_1_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    349     336      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseEpsilon_1_1_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     71      63      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                real_t* RESTRICT _data_muEdge,
+                                                                                real_t* RESTRICT _data_muVertex,
+                                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                                int64_t          micro_edges_per_macro_edge,
+                                                                                real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    179     156      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                real_t* RESTRICT _data_muEdge,
+                                                                                real_t* RESTRICT _data_muVertex,
+                                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                                real_t           macro_vertex_coord_id_0comp2,
+                                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                                real_t           macro_vertex_coord_id_1comp2,
+                                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                                real_t           macro_vertex_coord_id_2comp2,
+                                                                                real_t           macro_vertex_coord_id_3comp0,
+                                                                                real_t           macro_vertex_coord_id_3comp1,
+                                                                                real_t           macro_vertex_coord_id_3comp2,
+                                                                                int64_t          micro_edges_per_macro_edge,
+                                                                                real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp
index a82145ad..229162c8 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_1_2::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_1_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_1_2::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_1_2::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_1_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_1_2::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp
index 061fe534..cc82cde0 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_1_2 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_1_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_1_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseEpsilon_1_2_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp
index 88fcdb37..42e1e428 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_0::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_0::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp
index 7313eff9..bfb70f48 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_2_0 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseEpsilon_2_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp
index 2e531a7c..4f5d19bc 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_2_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_1::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_2_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_1::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp
index 9ab103ad..7487ce1a 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_2_1 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_2_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_2_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseEpsilon_2_1_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp
index 257b6228..74d1a338 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_2::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseEpsilon_2_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_2::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_2::toMatrix( const std::shared_ptr< SparseMatrixProx
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseEpsilon_2_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_2::toMatrix( const std::shared_ptr< SparseMatrixProx
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -321,7 +323,7 @@ void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -341,6 +343,7 @@ void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp
index c6677d45..444c1266 100644
--- a/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp
+++ b/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -88,83 +90,97 @@ class P2ElementwiseEpsilon_2_2 : public Operator< P2Function< real_t >, P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseEpsilon_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    449     436      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                 real_t* RESTRICT _data_dstVertex,
+                                                 real_t* RESTRICT _data_muEdge,
+                                                 real_t* RESTRICT _data_muVertex,
+                                                 real_t* RESTRICT _data_srcEdge,
+                                                 real_t* RESTRICT _data_srcVertex,
+                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                 int64_t          micro_edges_per_macro_edge,
+                                                 real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    349     336      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseEpsilon_2_2_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                    idx_t* RESTRICT                      _data_dstVertex,
+                                                    real_t* RESTRICT                     _data_muEdge,
+                                                    real_t* RESTRICT                     _data_muVertex,
+                                                    idx_t* RESTRICT                      _data_srcEdge,
+                                                    idx_t* RESTRICT                      _data_srcVertex,
+                                                    real_t                               macro_vertex_coord_id_0comp0,
+                                                    real_t                               macro_vertex_coord_id_0comp1,
+                                                    real_t                               macro_vertex_coord_id_0comp2,
+                                                    real_t                               macro_vertex_coord_id_1comp0,
+                                                    real_t                               macro_vertex_coord_id_1comp1,
+                                                    real_t                               macro_vertex_coord_id_1comp2,
+                                                    real_t                               macro_vertex_coord_id_2comp0,
+                                                    real_t                               macro_vertex_coord_id_2comp1,
+                                                    real_t                               macro_vertex_coord_id_2comp2,
+                                                    real_t                               macro_vertex_coord_id_3comp0,
+                                                    real_t                               macro_vertex_coord_id_3comp1,
+                                                    real_t                               macro_vertex_coord_id_3comp2,
+                                                    std::shared_ptr< SparseMatrixProxy > mat,
+                                                    int64_t                              micro_edges_per_macro_edge,
+                                                    real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseEpsilon_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    179     156      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                real_t* RESTRICT _data_muEdge,
+                                                                                real_t* RESTRICT _data_muVertex,
+                                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                                real_t           macro_vertex_coord_id_0comp2,
+                                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                                real_t           macro_vertex_coord_id_1comp2,
+                                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                                real_t           macro_vertex_coord_id_2comp2,
+                                                                                real_t           macro_vertex_coord_id_3comp0,
+                                                                                real_t           macro_vertex_coord_id_3comp1,
+                                                                                real_t           macro_vertex_coord_id_3comp2,
+                                                                                int64_t          micro_edges_per_macro_edge,
+                                                                                real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
index ce842b48..b47e68a3 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_0_0::apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
index ef8e4674..bbe9480e 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
index bd671a16..d9cd38d5 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_0_1::apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
index b84cbeab..59e329a7 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_1_0::apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
index 56b89e6e..3baa7447 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_1_1::apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
index b4709967..3a670082 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
index 48a9372c..396e50cb 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
index 43f779ee..491f16c8 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
index 9cd33c41..46604aa5 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
index 4c6a2797..d95e76e4 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
index 7b297955..d6e58a78 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
index 598674e8..689f4fe7 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
index 005e2796..3f3d7eb3 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
index bafa0faf..4ec48e9b 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
index 17d106ef..878c913b 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
index 66e3853b..a8829f8f 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
index 9c538b03..7d75a6ae 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
index 012da950..112c7d97 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
index 42313668..63d3593b 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
index a8bd0968..36f61a5e 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
index 32acd0de..9b7c8917 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
index 2313d32f..21a76a0d 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
index e991c681..bc7617f3 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
index 729b987d..8e91b3bc 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
index fdc396b2..71be2bfd 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_2::apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
index 228462fe..49b83fbe 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
index 64c043ff..3494ba7f 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
index a89339ae..6df91095 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
index 76f79e5e..ae126d2a 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
index d9e7eb50..b55ea264 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
index e6320861..11c5733e 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
index 13ddacc4..86cf0c46 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_2::apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
index 55827d3e..067d5ebd 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_0::apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
index 1da51cbd..872f66db 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_1::apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
index cfb3b76f..41c47dd7 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_2::apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
index af6887f0..bfbc9bdc 100644
--- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
similarity index 98%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
index 635fcc9a..8859de5a 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_0_0::apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
similarity index 98%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
index f624a381..7023d510 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
similarity index 98%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
index 45eb4ac0..f88bffdc 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
index b0ba0ca1..b6cdec48 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_0_1::apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
index 6368a651..ef2f5199 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
index 39c9b043..d46b1131 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_1_0::apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
index f5a609f8..17e02897 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
similarity index 98%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
index b6cfddab..9ebafa72 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_1_1::apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
similarity index 98%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
index 50bb3db1..2a2c83dc 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
similarity index 98%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
index ae3d2188..4ff5f3ce 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
index 8d3f4915..d948db60 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
index 0429ae90..14a150f5 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
index 6e00075f..24630d2c 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
index 9c2f8b13..0d6cc152 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
index 9746d238..cf07447d 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
index 40e85e49..093daae6 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
index 2d1ae586..933f47e1 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
index a3c8f831..8ec79a9f 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
index 6ea180e2..35c9f7c5 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
index b8fa2f15..92ad7a47 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
index d6ed3aaa..a2803b26 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
index 913751dd..6b80cdad 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
index 1d04f1a4..f1756dbf 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
index 4a33acf8..d44e9560 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
index 19f5036b..59e82034 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
index 246bf4b2..4472c135 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
index 4839662b..3697fa5a 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
index ac3e7fe1..f6eaedfd 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
index f35664d0..81b2f338 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
index 995e8d7d..5c8cadef 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
index 48ccdc57..4a401e54 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
index a8a5ea94..e7f157ff 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
index 6a6c83f8..48e29e4a 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
index 29960423..03bb47dd 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
index 5a6d6a11..d5c47673 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp
index 196b0631..f5a61bc6 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::toMatrix_P2ElementwiseEpsilon_0_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp
index 75e2a24b..e380f4ef 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_0::toMatrix_P2ElementwiseEpsilon_0_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
index b43c2532..5b6f7087 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
index 377700e3..674d6dbb 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp
index f112ea9a..5c9266e1 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_1::toMatrix_P2ElementwiseEpsilon_0_1_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp
index 5df17aeb..79008336 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_1::toMatrix_P2ElementwiseEpsilon_0_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
index 833d6080..178addd6 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_2::apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp
index c0a3819d..81d08aec 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_0_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_0_2::toMatrix_P2ElementwiseEpsilon_0_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
index 626c2765..9c1b8a28 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
index ce752ff6..60cf4d51 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp
index e6b12c78..85f92d5e 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_0::toMatrix_P2ElementwiseEpsilon_1_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp
index ff45b435..ec5f7bea 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_0::toMatrix_P2ElementwiseEpsilon_1_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
index eedbea32..39298ace 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
index d449ae6a..026cd8dd 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
index 91d9e9ca..cea03d04 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
index 883735ab..0a111455 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp
index 79549f47..a58872ea 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::toMatrix_P2ElementwiseEpsilon_1_1_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp
index aff549f4..1b08642e 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_1::toMatrix_P2ElementwiseEpsilon_1_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
index b3b27120..49e96a45 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_2::apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp
index 8772aa7b..09b18c98 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_1_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_1_2::toMatrix_P2ElementwiseEpsilon_1_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
index b0dcd426..7f3b1f1f 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_0::apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp
index 97ade253..d66a4e3f 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_0::toMatrix_P2ElementwiseEpsilon_2_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
index fd8f4f22..b0e72a02 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_1::apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp
index f8c35e99..ece95813 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_1::toMatrix_P2ElementwiseEpsilon_2_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
index 443f2405..4058126a 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_2::apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
index f243bfa0..74288889 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp
similarity index 99%
rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp
rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp
index 30cc56a5..9801afb4 100644
--- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseEpsilon_2_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseEpsilon_2_2::toMatrix_P2ElementwiseEpsilon_2_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/CMakeLists.txt b/operators/full_stokes/CMakeLists.txt
index cd9d5f1b..4e9d365a 100644
--- a/operators/full_stokes/CMakeLists.txt
+++ b/operators/full_stokes/CMakeLists.txt
@@ -49,108 +49,108 @@ add_library( opgen-full_stokes
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-full_stokes PRIVATE
 
-      avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp
-      avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
+      avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
+      avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -161,68 +161,68 @@ else()
 
    target_sources(opgen-full_stokes PRIVATE
 
-      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp
+      noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
+      noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp
index 72dfac44..58031f2c 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::apply( const P2Function< real_t >& s
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::apply( const P2Function< real_t >& s
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix( const std::shared_ptr< Spa
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix( const std::shared_ptr< Spa
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -349,7 +351,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -371,6 +373,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp
index 579eb8f0..2117e368 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -99,89 +101,104 @@ class P2ElementwiseFullStokesAnnulusMap_0_0 : public Operator< P2Function< real_
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    888    1304      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                              real_t* RESTRICT _data_dstVertex,
+                                                              real_t* RESTRICT _data_muEdge,
+                                                              real_t* RESTRICT _data_muVertex,
+                                                              real_t* RESTRICT _data_srcEdge,
+                                                              real_t* RESTRICT _data_srcVertex,
+                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                              int64_t          micro_edges_per_macro_edge,
+                                                              real_t           micro_edges_per_macro_edge_float,
+                                                              real_t           radRayVertex,
+                                                              real_t           radRefVertex,
+                                                              real_t           rayVertex_0,
+                                                              real_t           rayVertex_1,
+                                                              real_t           refVertex_0,
+                                                              real_t           refVertex_1,
+                                                              real_t           thrVertex_0,
+                                                              real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    852    1268      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( idx_t* RESTRICT  _data_dstEdge,
+                                                                 idx_t* RESTRICT  _data_dstVertex,
+                                                                 real_t* RESTRICT _data_muEdge,
+                                                                 real_t* RESTRICT _data_muVertex,
+                                                                 idx_t* RESTRICT  _data_srcEdge,
+                                                                 idx_t* RESTRICT  _data_srcVertex,
+                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                                 int64_t                              micro_edges_per_macro_edge,
+                                                                 real_t micro_edges_per_macro_edge_float,
+                                                                 real_t radRayVertex,
+                                                                 real_t radRefVertex,
+                                                                 real_t rayVertex_0,
+                                                                 real_t rayVertex_1,
+                                                                 real_t refVertex_0,
+                                                                 real_t refVertex_1,
+                                                                 real_t thrVertex_0,
+                                                                 real_t thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    618     968      28      20      4              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp
index efbb3d8b..7127fc85 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::apply( const P2Function< real_t >& s
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::apply( const P2Function< real_t >& s
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix( const std::shared_ptr< Spa
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix( const std::shared_ptr< Spa
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp
index 5c457c8d..6a380c6e 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -93,63 +95,72 @@ class P2ElementwiseFullStokesAnnulusMap_0_1 : public Operator< P2Function< real_
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1128    1452      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                              real_t* RESTRICT _data_dstVertex,
+                                                              real_t* RESTRICT _data_muEdge,
+                                                              real_t* RESTRICT _data_muVertex,
+                                                              real_t* RESTRICT _data_srcEdge,
+                                                              real_t* RESTRICT _data_srcVertex,
+                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                              int64_t          micro_edges_per_macro_edge,
+                                                              real_t           micro_edges_per_macro_edge_float,
+                                                              real_t           radRayVertex,
+                                                              real_t           radRefVertex,
+                                                              real_t           rayVertex_0,
+                                                              real_t           rayVertex_1,
+                                                              real_t           refVertex_0,
+                                                              real_t           refVertex_1,
+                                                              real_t           thrVertex_0,
+                                                              real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1092    1416      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( idx_t* RESTRICT  _data_dstEdge,
+                                                                 idx_t* RESTRICT  _data_dstVertex,
+                                                                 real_t* RESTRICT _data_muEdge,
+                                                                 real_t* RESTRICT _data_muVertex,
+                                                                 idx_t* RESTRICT  _data_srcEdge,
+                                                                 idx_t* RESTRICT  _data_srcVertex,
+                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                                 int64_t                              micro_edges_per_macro_edge,
+                                                                 real_t micro_edges_per_macro_edge_float,
+                                                                 real_t radRayVertex,
+                                                                 real_t radRefVertex,
+                                                                 real_t rayVertex_0,
+                                                                 real_t rayVertex_1,
+                                                                 real_t refVertex_0,
+                                                                 real_t refVertex_1,
+                                                                 real_t thrVertex_0,
+                                                                 real_t thrVertex_1 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp
index e701b20c..b618a343 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::apply( const P2Function< real_t >& s
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::apply( const P2Function< real_t >& s
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix( const std::shared_ptr< Spa
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix( const std::shared_ptr< Spa
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp
index 299e3fe3..b6a47058 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -93,63 +95,72 @@ class P2ElementwiseFullStokesAnnulusMap_1_0 : public Operator< P2Function< real_
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1128    1456      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                              real_t* RESTRICT _data_dstVertex,
+                                                              real_t* RESTRICT _data_muEdge,
+                                                              real_t* RESTRICT _data_muVertex,
+                                                              real_t* RESTRICT _data_srcEdge,
+                                                              real_t* RESTRICT _data_srcVertex,
+                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                              int64_t          micro_edges_per_macro_edge,
+                                                              real_t           micro_edges_per_macro_edge_float,
+                                                              real_t           radRayVertex,
+                                                              real_t           radRefVertex,
+                                                              real_t           rayVertex_0,
+                                                              real_t           rayVertex_1,
+                                                              real_t           refVertex_0,
+                                                              real_t           refVertex_1,
+                                                              real_t           thrVertex_0,
+                                                              real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1092    1420      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( idx_t* RESTRICT  _data_dstEdge,
+                                                                 idx_t* RESTRICT  _data_dstVertex,
+                                                                 real_t* RESTRICT _data_muEdge,
+                                                                 real_t* RESTRICT _data_muVertex,
+                                                                 idx_t* RESTRICT  _data_srcEdge,
+                                                                 idx_t* RESTRICT  _data_srcVertex,
+                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                                 int64_t                              micro_edges_per_macro_edge,
+                                                                 real_t micro_edges_per_macro_edge_float,
+                                                                 real_t radRayVertex,
+                                                                 real_t radRefVertex,
+                                                                 real_t rayVertex_0,
+                                                                 real_t rayVertex_1,
+                                                                 real_t refVertex_0,
+                                                                 real_t refVertex_1,
+                                                                 real_t thrVertex_0,
+                                                                 real_t thrVertex_1 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp
index 663ef74a..99de0a35 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::apply( const P2Function< real_t >& s
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::apply( const P2Function< real_t >& s
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix( const std::shared_ptr< Spa
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix( const std::shared_ptr< Spa
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -349,7 +351,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -371,6 +373,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp
index 61485efc..01025b15 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -99,89 +101,104 @@ class P2ElementwiseFullStokesAnnulusMap_1_1 : public Operator< P2Function< real_
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    888    1304      28      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                              real_t* RESTRICT _data_dstVertex,
+                                                              real_t* RESTRICT _data_muEdge,
+                                                              real_t* RESTRICT _data_muVertex,
+                                                              real_t* RESTRICT _data_srcEdge,
+                                                              real_t* RESTRICT _data_srcVertex,
+                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                              int64_t          micro_edges_per_macro_edge,
+                                                              real_t           micro_edges_per_macro_edge_float,
+                                                              real_t           radRayVertex,
+                                                              real_t           radRefVertex,
+                                                              real_t           rayVertex_0,
+                                                              real_t           rayVertex_1,
+                                                              real_t           refVertex_0,
+                                                              real_t           refVertex_1,
+                                                              real_t           thrVertex_0,
+                                                              real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    852    1268      28      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( idx_t* RESTRICT  _data_dstEdge,
+                                                                 idx_t* RESTRICT  _data_dstVertex,
+                                                                 real_t* RESTRICT _data_muEdge,
+                                                                 real_t* RESTRICT _data_muVertex,
+                                                                 idx_t* RESTRICT  _data_srcEdge,
+                                                                 idx_t* RESTRICT  _data_srcVertex,
+                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                                 int64_t                              micro_edges_per_macro_edge,
+                                                                 real_t micro_edges_per_macro_edge_float,
+                                                                 real_t radRayVertex,
+                                                                 real_t radRefVertex,
+                                                                 real_t rayVertex_0,
+                                                                 real_t rayVertex_1,
+                                                                 real_t refVertex_0,
+                                                                 real_t refVertex_1,
+                                                                 real_t thrVertex_0,
+                                                                 real_t thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    618     968      28      20      4              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp
index 48536c31..2ced3fd9 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOpera
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOpera
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp
index df0a75c6..a32d7e1c 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_0 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3513    5177      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3413    5077      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2298    3592      66      15      5              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp
index da66b821..3d449445 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp
index 7b524949..07133b03 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_1 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4283    5397      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4183    5297      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp
index 3f5efa0b..ff66e6ea 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp
index 3f5c1ae6..5e2cfe06 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_2 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4283    5392      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4183    5292      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp
index 8f1aebff..2b4f9a3a 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp
index c0c06628..283daa25 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_0 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4283    5387      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4183    5287      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp
index 65de5632..fb682439 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOpera
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOpera
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp
index b07be550..69319c20 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_1 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3513    5187      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3413    5087      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2298    3602      66      15      5              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp
index 93197932..89f9ae24 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp
index 12b40740..3949dc64 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_2 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4283    5392      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4183    5292      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp
index 9b9e077c..2b12e9a4 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp
index e7f89bd7..66b9c464 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_0 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4283    5387      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4183    5287      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp
index 33789ed1..b8c0e5e9 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp
index d758b382..d584d34c 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_1 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4283    5387      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   4183    5287      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp
index 1ed1ee4b..f3b67bf0 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp
@@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply( const P2Function< re
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply( const P2Function< re
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix( const std::shared
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix( const std::shared
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOpera
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOpera
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp
index 56e9fc5b..0288c626 100644
--- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_2 : public Operator< P2Functi
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3513    5182      66      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                       real_t* RESTRICT _data_dstVertex,
+                                                                       real_t* RESTRICT _data_muEdge,
+                                                                       real_t* RESTRICT _data_muVertex,
+                                                                       real_t* RESTRICT _data_srcEdge,
+                                                                       real_t* RESTRICT _data_srcVertex,
+                                                                       real_t           forVertex_0,
+                                                                       real_t           forVertex_1,
+                                                                       real_t           forVertex_2,
+                                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                                       real_t           macro_vertex_coord_id_0comp2,
+                                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                                       real_t           macro_vertex_coord_id_1comp2,
+                                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                                       real_t           macro_vertex_coord_id_2comp2,
+                                                                       real_t           macro_vertex_coord_id_3comp0,
+                                                                       real_t           macro_vertex_coord_id_3comp1,
+                                                                       real_t           macro_vertex_coord_id_3comp2,
+                                                                       int64_t          micro_edges_per_macro_edge,
+                                                                       real_t           micro_edges_per_macro_edge_float,
+                                                                       real_t           radRayVertex,
+                                                                       real_t           radRefVertex,
+                                                                       real_t           rayVertex_0,
+                                                                       real_t           rayVertex_1,
+                                                                       real_t           rayVertex_2,
+                                                                       real_t           refVertex_0,
+                                                                       real_t           refVertex_1,
+                                                                       real_t           refVertex_2,
+                                                                       real_t           thrVertex_0,
+                                                                       real_t           thrVertex_1,
+                                                                       real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   3413    5082      66      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                          idx_t* RESTRICT  _data_dstVertex,
+                                                                          real_t* RESTRICT _data_muEdge,
+                                                                          real_t* RESTRICT _data_muVertex,
+                                                                          idx_t* RESTRICT  _data_srcEdge,
+                                                                          idx_t* RESTRICT  _data_srcVertex,
+                                                                          real_t           forVertex_0,
+                                                                          real_t           forVertex_1,
+                                                                          real_t           forVertex_2,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          std::shared_ptr< SparseMatrixProxy > mat,
+                                                                          int64_t micro_edges_per_macro_edge,
+                                                                          real_t  micro_edges_per_macro_edge_float,
+                                                                          real_t  radRayVertex,
+                                                                          real_t  radRefVertex,
+                                                                          real_t  rayVertex_0,
+                                                                          real_t  rayVertex_1,
+                                                                          real_t  rayVertex_2,
+                                                                          real_t  refVertex_0,
+                                                                          real_t  refVertex_1,
+                                                                          real_t  refVertex_2,
+                                                                          real_t  thrVertex_0,
+                                                                          real_t  thrVertex_1,
+                                                                          real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2298    3597      66      15      5              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp
index e1c98782..928dd7c8 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseFullStokes_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseFullStokes_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -440,7 +444,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -460,6 +464,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -504,7 +509,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -518,6 +523,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp
index 63e4e09a..df1b1b4f 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -98,142 +100,173 @@ class P2ElementwiseFullStokes_0_0 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    146     144      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    449     436      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    110     108      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseFullStokes_0_0_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    349     336      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseFullStokes_0_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     71      63      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void
+       computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                  real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                  real_t* RESTRICT _data_muEdge,
+                                                                                  real_t* RESTRICT _data_muVertex,
+                                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                                  real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    179     156      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void
+       computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                  real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                  real_t* RESTRICT _data_muEdge,
+                                                                                  real_t* RESTRICT _data_muVertex,
+                                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                                  real_t           macro_vertex_coord_id_0comp2,
+                                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                                  real_t           macro_vertex_coord_id_1comp2,
+                                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                                  real_t           macro_vertex_coord_id_2comp2,
+                                                                                  real_t           macro_vertex_coord_id_3comp0,
+                                                                                  real_t           macro_vertex_coord_id_3comp1,
+                                                                                  real_t           macro_vertex_coord_id_3comp2,
+                                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                                  real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp
index a9cc1381..8d55c414 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_0_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseFullStokes_0_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_0_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseFullStokes_0_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp
index b626c4d7..2383334d 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -92,100 +94,119 @@ class P2ElementwiseFullStokes_0_1 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    191     189      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    155     153      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseFullStokes_0_1_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseFullStokes_0_1_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp
index ddfb13ea..9f8044a7 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_0_2::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_0_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_0_2::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_0_2::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_0_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_0_2::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp
index 37c3f570..bcd3a4ac 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_0_2 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_0_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseFullStokes_0_2_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp
index 951259bc..044e64af 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseFullStokes_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseFullStokes_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp
index 318f05d4..20104068 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -92,100 +94,119 @@ class P2ElementwiseFullStokes_1_0 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    191     189      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    155     153      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseFullStokes_1_0_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseFullStokes_1_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp
index 7b031440..a2c8882c 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_1_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseFullStokes_1_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_1_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseFullStokes_1_1_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -440,7 +444,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -460,6 +464,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -504,7 +509,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -518,6 +523,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp
index 44988bf2..820ab0cc 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -98,142 +100,173 @@ class P2ElementwiseFullStokes_1_1 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    146     144      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    449     436      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    110     108      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseFullStokes_1_1_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    349     336      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseFullStokes_1_1_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_1
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     71      63      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void
+       computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                  real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                  real_t* RESTRICT _data_muEdge,
+                                                                                  real_t* RESTRICT _data_muVertex,
+                                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                                  real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    179     156      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void
+       computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                  real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                  real_t* RESTRICT _data_muEdge,
+                                                                                  real_t* RESTRICT _data_muVertex,
+                                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                                  real_t           macro_vertex_coord_id_0comp2,
+                                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                                  real_t           macro_vertex_coord_id_1comp2,
+                                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                                  real_t           macro_vertex_coord_id_2comp2,
+                                                                                  real_t           macro_vertex_coord_id_3comp0,
+                                                                                  real_t           macro_vertex_coord_id_3comp1,
+                                                                                  real_t           macro_vertex_coord_id_3comp2,
+                                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                                  real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp
index bd7685ec..b2551208 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_1_2::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_1_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_1_2::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_1_2::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_1_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_1_2::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp
index 84e25265..e92d8145 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_1_2 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_1_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_1_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseFullStokes_1_2_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp
index b61da148..7bbbb100 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_0::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_0::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_0::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_0::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp
index 2eaff7a7..f0fac0c0 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_2_0 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseFullStokes_2_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp
index 70063545..dba8cdb8 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_1::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_2_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_1::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_1::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_2_1_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_1::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp
index 6903a7f9..5cb09896 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp
@@ -29,12 +29,14 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_2_1 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_2_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    629     616      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_2_1
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    529     516      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ElementwiseFullStokes_2_1_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > mu;
 };
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp
index 4d0715c1..1a7be77d 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp
@@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_2::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseFullStokes_2_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_2::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_2::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseFullStokes_2_2_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_2::toMatrix( const std::shared_ptr< SparseMatrixP
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -321,7 +323,7 @@ void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -341,6 +343,7 @@ void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp
index 521c219f..015385dc 100644
--- a/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp
+++ b/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -98,83 +100,98 @@ class P2ElementwiseFullStokes_2_2 : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseFullStokes_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    449     436      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_muEdge,
+                                                    real_t* RESTRICT _data_muVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_0comp2,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_1comp2,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    real_t           macro_vertex_coord_id_2comp2,
+                                                    real_t           macro_vertex_coord_id_3comp0,
+                                                    real_t           macro_vertex_coord_id_3comp1,
+                                                    real_t           macro_vertex_coord_id_3comp2,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    349     336      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseFullStokes_2_2_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       real_t* RESTRICT                     _data_muEdge,
+                                                       real_t* RESTRICT                     _data_muVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_0comp2,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp2,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp2,
+                                                       real_t                               macro_vertex_coord_id_3comp0,
+                                                       real_t                               macro_vertex_coord_id_3comp1,
+                                                       real_t                               macro_vertex_coord_id_3comp2,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseFullStokes_2_2
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    179     156      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void
+       computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                  real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                  real_t* RESTRICT _data_muEdge,
+                                                                                  real_t* RESTRICT _data_muVertex,
+                                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                                  real_t           macro_vertex_coord_id_0comp2,
+                                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                                  real_t           macro_vertex_coord_id_1comp2,
+                                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                                  real_t           macro_vertex_coord_id_2comp2,
+                                                                                  real_t           macro_vertex_coord_id_3comp0,
+                                                                                  real_t           macro_vertex_coord_id_3comp1,
+                                                                                  real_t           macro_vertex_coord_id_3comp2,
+                                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                                  real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
index 39f53c13..4dbc0bc1 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_0_0::apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
index 1460316d..bb6db6bc 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
index 3b856520..9018004f 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_0_1::apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
index 2ef8ff10..5681bd3f 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_1_0::apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
index 6211a35b..4efc8a1d 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_1_1::apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
index 87383033..6240b9a1 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
index d1d11acf..bd795ece 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
index a16c5329..340f7d64 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
index 8a7cf2ab..8f0f8014 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
index 8baecf92..46887de2 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
index 3992a8cd..8811dc99 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
index f8a51d81..394fcd42 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
index 4f6e4f54..9527aac2 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
index 4a7346e9..3db3326f 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
index 98e38c67..b4f9412f 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
index 77dfb79c..401e9caf 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
index bca7d6ac..34bba0e9 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
index fee58ccd..cc95c034 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
index 6f9d6544..100047eb 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
index 7935e1ec..37a69d18 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
index 7533c51a..7f40a662 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
index 36278a1b..26ae96be 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
index 477da792..146c9577 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
index 83689cd7..f9f95d88 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
index f91882dd..c4fda7ce 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_2::apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
index 197bdd29..b51195bb 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
index 1a4835c0..f4aa4d2b 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
index 921bacac..4af8f031 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
index ca6d5eb5..91b223eb 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
index 6e71a669..8d9b3df2 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
index 7931c82b..729b593c 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
index a0835f60..b6c06168 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_2::apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
index 2f099c32..de5791d1 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_0::apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
index 2c3c6d45..3d8e9bfb 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_1::apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
index 7d065898..aa9338f2 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_2::apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
index 926febf0..10a45775 100644
--- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
index fcfdb454..9b6ea9f3 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_0_0::apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
similarity index 98%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
index 9eb6d67f..ec6820a3 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
index 3f9a5785..ad06d887 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
index 66828422..e0d4c035 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_0_1::apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
index 14582315..f942f57d 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
index 9b0d1793..cad742bd 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_1_0::apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
index d0bebd24..1e9c19f6 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
index 979ed833..6d453b80 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_1_1::apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
similarity index 98%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
index 76943090..f2039f44 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
index 2a2aa192..21eb2177 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
index 81f22134..c8a4bbe1 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
index f3df1c09..acccfd6f 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
index a9d9a1f5..d5ad8eb1 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
index f044c0ae..04de9f72 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
index 70b5b051..3f23db97 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
index 1e0cc9b7..b19089c4 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
index b5b1070e..aa32fff8 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
index 333fd1e8..74b56c62 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
index d72855f6..a202968d 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
index a57a3fd2..772db896 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
index bb0882fa..662bd4a7 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
index 434add40..b8eb8580 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
index 8b23ea41..648491d4 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
index fd776b0a..2cef19c3 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
index 2434f82c..b92b8c5b 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
index 2e9034d4..9cbc1b0c 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
index 5755a5ae..3d4c2235 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
index 2eb5019f..8ef485bc 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
index e84963ef..b9cd02ce 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
index 520de317..65a69f3e 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
index 86ee979c..3946b762 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
index 6b4d7d52..61bacabf 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
index 1aff6791..4a0288c3 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
index c9ff205f..06a99e0d 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
index 32cc6f0d..449bf148 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp
index a4527e17..95ab9e88 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::toMatrix_P2ElementwiseFullStokes_0_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp
index 89a2608e..79fc6e1b 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_0::toMatrix_P2ElementwiseFullStokes_0_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
index 60d01a6e..d7f1666e 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
index 270ad16c..bfdfc243 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp
index 9c47d963..d01b881d 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_1::toMatrix_P2ElementwiseFullStokes_0_1_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp
index 03de4a31..f787ed05 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_1::toMatrix_P2ElementwiseFullStokes_0_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
index 119aef26..e6eb52a2 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_2::apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp
index 4db54ae7..9b37c459 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_0_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_0_2::toMatrix_P2ElementwiseFullStokes_0_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
index b592dcf8..7ac7272a 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
index c41d6827..292a8079 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp
index 3a614c4d..3f8e5d1c 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_0::toMatrix_P2ElementwiseFullStokes_1_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp
index 8fe55dc1..67f69136 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_0::toMatrix_P2ElementwiseFullStokes_1_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
index 09aaf0e8..18733b28 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
index 17f5451f..c53209c5 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
index 8bde675a..7ab2a66b 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
index d7f4d343..5083d902 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp
index 350f2fee..e91096a4 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::toMatrix_P2ElementwiseFullStokes_1_1_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp
index 99e17bce..461c648c 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_1::toMatrix_P2ElementwiseFullStokes_1_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
index 1a178923..ccf6d3d2 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_2::apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp
index e8dc95d6..c7a48f23 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_1_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_1_2::toMatrix_P2ElementwiseFullStokes_1_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
index bc456082..1f1340c8 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_0::apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp
index f3ef7329..8ae974b7 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_0::toMatrix_P2ElementwiseFullStokes_2_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
index 7246ec9f..bda6c1e4 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_1::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_1::apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp
index 0f363c91..8626314a 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp
@@ -41,6 +41,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -54,7 +58,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_1::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_1::toMatrix_P2ElementwiseFullStokes_2_1_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
index af6ddf52..06c0ca22 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_2::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_2::apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
index 927aa9d5..19d3c294 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp
similarity index 99%
rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp
rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp
index b1323923..84eeba13 100644
--- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseFullStokes_2_2::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseFullStokes_2_2::toMatrix_P2ElementwiseFullStokes_2_2_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/grad_rho_by_rho_dot_u/CMakeLists.txt b/operators/grad_rho_by_rho_dot_u/CMakeLists.txt
index 7660ec90..dcb22f8a 100644
--- a/operators/grad_rho_by_rho_dot_u/CMakeLists.txt
+++ b/operators/grad_rho_by_rho_dot_u/CMakeLists.txt
@@ -8,17 +8,45 @@ add_library( opgen-grad_rho_by_rho_dot_u
    P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp
 )
 
-target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE
-
-   noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp
-   noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp
-   noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp
-   noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp
-   noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp
-   noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp
-   noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp
-   noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp
-)
+if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
+   target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE
+
+      avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
+      avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
+      avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
+      avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
+   )
+
+   set_source_files_properties(
+
+      avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
+      avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
+      avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
+      avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
+
+      PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
+   )
+else()
+   if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY)
+      message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.")
+   endif()
+
+   target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE
+
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
+      noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
+   )
+endif()
 
 if (HYTEG_BUILD_WITH_PETSC)
    target_link_libraries(opgen-grad_rho_by_rho_dot_u PUBLIC PETSc::PETSc)
diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp
index 43cad945..130fed36 100644
--- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp
+++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp
@@ -147,7 +147,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D(
 
              _data_dst,
              _data_rhoEdge,
@@ -172,6 +172,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -225,7 +226,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D(
 
              _data_dst,
              _data_rhoEdge,
@@ -242,6 +243,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -312,7 +314,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D(
 
              _data_dst,
              _data_rhoEdge,
@@ -338,6 +340,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -372,7 +375,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D(
 
              _data_dst,
              _data_rhoEdge,
@@ -390,6 +393,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp
index 6328e35f..1b858953 100644
--- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp
+++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2VectorFunction.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -50,7 +52,7 @@ namespace operatorgeneration {
 ///
 /// Weak formulation
 ///
-///     u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2))
+///     u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2))
 ///     v: test function  (space: Lagrange, degree: 1)
 ///     rho: coefficient    (space: Lagrange, degree: 2)
 ///
@@ -78,108 +80,127 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotU : public Operator< P2VectorFunctio
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
-   ///    356     380      16       0      0              0                 0              0
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_rhoEdge,
-                        real_t* RESTRICT _data_rhoVertex,
-                        real_t* RESTRICT _data_src_edge_0,
-                        real_t* RESTRICT _data_src_edge_1,
-                        real_t* RESTRICT _data_src_vertex_0,
-                        real_t* RESTRICT _data_src_vertex_1,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   ///    356     384      16       0      0              0                 0              1
+   void apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t* RESTRICT _data_dst,
+                                                                real_t* RESTRICT _data_rhoEdge,
+                                                                real_t* RESTRICT _data_rhoVertex,
+                                                                real_t* RESTRICT _data_src_edge_0,
+                                                                real_t* RESTRICT _data_src_edge_1,
+                                                                real_t* RESTRICT _data_src_vertex_0,
+                                                                real_t* RESTRICT _data_src_vertex_1,
+                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                int64_t          micro_edges_per_macro_edge,
+                                                                real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
-   ///   1153    1162      41       0      0              0                 0              0
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_rhoEdge,
-                        real_t* RESTRICT _data_rhoVertex,
-                        real_t* RESTRICT _data_src_edge_0,
-                        real_t* RESTRICT _data_src_edge_1,
-                        real_t* RESTRICT _data_src_edge_2,
-                        real_t* RESTRICT _data_src_vertex_0,
-                        real_t* RESTRICT _data_src_vertex_1,
-                        real_t* RESTRICT _data_src_vertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   ///   1153    1167      41       0      0              0                 0              1
+   void apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t* RESTRICT _data_dst,
+                                                                real_t* RESTRICT _data_rhoEdge,
+                                                                real_t* RESTRICT _data_rhoVertex,
+                                                                real_t* RESTRICT _data_src_edge_0,
+                                                                real_t* RESTRICT _data_src_edge_1,
+                                                                real_t* RESTRICT _data_src_edge_2,
+                                                                real_t* RESTRICT _data_src_vertex_0,
+                                                                real_t* RESTRICT _data_src_vertex_1,
+                                                                real_t* RESTRICT _data_src_vertex_2,
+                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                real_t           macro_vertex_coord_id_0comp2,
+                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                real_t           macro_vertex_coord_id_1comp2,
+                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                real_t           macro_vertex_coord_id_2comp2,
+                                                                real_t           macro_vertex_coord_id_3comp0,
+                                                                real_t           macro_vertex_coord_id_3comp1,
+                                                                real_t           macro_vertex_coord_id_3comp2,
+                                                                int64_t          micro_edges_per_macro_edge,
+                                                                real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
-   ///    320     344      16       0      0              0                 0              3
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_rhoEdge,
-                           real_t* RESTRICT                     _data_rhoVertex,
-                           idx_t* RESTRICT                      _data_src_edge_0,
-                           idx_t* RESTRICT                      _data_src_edge_1,
-                           idx_t* RESTRICT                      _data_src_vertex_0,
-                           idx_t* RESTRICT                      _data_src_vertex_1,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   ///    320     348      16       0      0              0                 0              4
+   void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( idx_t* RESTRICT  _data_dst,
+                                                                   real_t* RESTRICT _data_rhoEdge,
+                                                                   real_t* RESTRICT _data_rhoVertex,
+                                                                   idx_t* RESTRICT  _data_src_edge_0,
+                                                                   idx_t* RESTRICT  _data_src_edge_1,
+                                                                   idx_t* RESTRICT  _data_src_vertex_0,
+                                                                   idx_t* RESTRICT  _data_src_vertex_1,
+                                                                   real_t           macro_vertex_coord_id_0comp0,
+                                                                   real_t           macro_vertex_coord_id_0comp1,
+                                                                   real_t           macro_vertex_coord_id_1comp0,
+                                                                   real_t           macro_vertex_coord_id_1comp1,
+                                                                   real_t           macro_vertex_coord_id_2comp0,
+                                                                   real_t           macro_vertex_coord_id_2comp1,
+                                                                   std::shared_ptr< SparseMatrixProxy > mat,
+                                                                   int64_t micro_edges_per_macro_edge,
+                                                                   real_t  micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
-   ///   1033    1042      41       0      0              0                 0              3
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_rhoEdge,
-                           real_t* RESTRICT                     _data_rhoVertex,
-                           idx_t* RESTRICT                      _data_src_edge_0,
-                           idx_t* RESTRICT                      _data_src_edge_1,
-                           idx_t* RESTRICT                      _data_src_edge_2,
-                           idx_t* RESTRICT                      _data_src_vertex_0,
-                           idx_t* RESTRICT                      _data_src_vertex_1,
-                           idx_t* RESTRICT                      _data_src_vertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   ///   1033    1047      41       0      0              0                 0              4
+   void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( idx_t* RESTRICT  _data_dst,
+                                                                   real_t* RESTRICT _data_rhoEdge,
+                                                                   real_t* RESTRICT _data_rhoVertex,
+                                                                   idx_t* RESTRICT  _data_src_edge_0,
+                                                                   idx_t* RESTRICT  _data_src_edge_1,
+                                                                   idx_t* RESTRICT  _data_src_edge_2,
+                                                                   idx_t* RESTRICT  _data_src_vertex_0,
+                                                                   idx_t* RESTRICT  _data_src_vertex_1,
+                                                                   idx_t* RESTRICT  _data_src_vertex_2,
+                                                                   real_t           macro_vertex_coord_id_0comp0,
+                                                                   real_t           macro_vertex_coord_id_0comp1,
+                                                                   real_t           macro_vertex_coord_id_0comp2,
+                                                                   real_t           macro_vertex_coord_id_1comp0,
+                                                                   real_t           macro_vertex_coord_id_1comp1,
+                                                                   real_t           macro_vertex_coord_id_1comp2,
+                                                                   real_t           macro_vertex_coord_id_2comp0,
+                                                                   real_t           macro_vertex_coord_id_2comp1,
+                                                                   real_t           macro_vertex_coord_id_2comp2,
+                                                                   real_t           macro_vertex_coord_id_3comp0,
+                                                                   real_t           macro_vertex_coord_id_3comp1,
+                                                                   real_t           macro_vertex_coord_id_3comp2,
+                                                                   std::shared_ptr< SparseMatrixProxy > mat,
+                                                                   int64_t micro_edges_per_macro_edge,
+                                                                   real_t  micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > rho;
 };
diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp
index 25714200..88271405 100644
--- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp
+++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp
@@ -141,7 +141,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply( const P2VectorFun
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D(
 
              _data_dst,
              _data_rhoEdge,
@@ -166,6 +166,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply( const P2VectorFun
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -247,7 +248,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix( const std::sha
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D(
 
              _data_dst,
              _data_rhoEdge,
@@ -273,6 +274,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix( const std::sha
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp
index e94fb265..64fadf55 100644
--- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp
+++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -38,6 +39,7 @@
 #include "hyteg/p2functionspace/P2VectorFunction.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -51,7 +53,7 @@ namespace operatorgeneration {
 ///
 /// Weak formulation
 ///
-///     u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2))
+///     u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2))
 ///     v: test function  (space: Lagrange, degree: 1)
 ///     rho: coefficient    (space: Lagrange, degree: 2)
 ///
@@ -79,65 +81,74 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap : public Operator< P2Vec
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
-   ///    560     732      24      12      0              0                 0              0
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_rhoEdge,
-                        real_t* RESTRICT _data_rhoVertex,
-                        real_t* RESTRICT _data_src_edge_0,
-                        real_t* RESTRICT _data_src_edge_1,
-                        real_t* RESTRICT _data_src_vertex_0,
-                        real_t* RESTRICT _data_src_vertex_1,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   ///    560     740      24      12      0              0                 0              1
+   void apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t* RESTRICT _data_dst,
+                                                                          real_t* RESTRICT _data_rhoEdge,
+                                                                          real_t* RESTRICT _data_rhoVertex,
+                                                                          real_t* RESTRICT _data_src_edge_0,
+                                                                          real_t* RESTRICT _data_src_edge_1,
+                                                                          real_t* RESTRICT _data_src_vertex_0,
+                                                                          real_t* RESTRICT _data_src_vertex_1,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          int64_t          micro_edges_per_macro_edge,
+                                                                          real_t           micro_edges_per_macro_edge_float,
+                                                                          real_t           radRayVertex,
+                                                                          real_t           radRefVertex,
+                                                                          real_t           rayVertex_0,
+                                                                          real_t           rayVertex_1,
+                                                                          real_t           refVertex_0,
+                                                                          real_t           refVertex_1,
+                                                                          real_t           thrVertex_0,
+                                                                          real_t           thrVertex_1 ) const;
+
+   /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
-   ///    524     696      24      12      0              0                 0              3
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_rhoEdge,
-                           real_t* RESTRICT                     _data_rhoVertex,
-                           idx_t* RESTRICT                      _data_src_edge_0,
-                           idx_t* RESTRICT                      _data_src_edge_1,
-                           idx_t* RESTRICT                      _data_src_vertex_0,
-                           idx_t* RESTRICT                      _data_src_vertex_1,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   ///    524     704      24      12      0              0                 0              4
+   void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( idx_t* RESTRICT  _data_dst,
+                                                                             real_t* RESTRICT _data_rhoEdge,
+                                                                             real_t* RESTRICT _data_rhoVertex,
+                                                                             idx_t* RESTRICT  _data_src_edge_0,
+                                                                             idx_t* RESTRICT  _data_src_edge_1,
+                                                                             idx_t* RESTRICT  _data_src_vertex_0,
+                                                                             idx_t* RESTRICT  _data_src_vertex_1,
+                                                                             real_t           macro_vertex_coord_id_0comp0,
+                                                                             real_t           macro_vertex_coord_id_0comp1,
+                                                                             real_t           macro_vertex_coord_id_1comp0,
+                                                                             real_t           macro_vertex_coord_id_1comp1,
+                                                                             real_t           macro_vertex_coord_id_2comp0,
+                                                                             real_t           macro_vertex_coord_id_2comp1,
+                                                                             std::shared_ptr< SparseMatrixProxy > mat,
+                                                                             int64_t micro_edges_per_macro_edge,
+                                                                             real_t  micro_edges_per_macro_edge_float,
+                                                                             real_t  radRayVertex,
+                                                                             real_t  radRefVertex,
+                                                                             real_t  rayVertex_0,
+                                                                             real_t  rayVertex_1,
+                                                                             real_t  refVertex_0,
+                                                                             real_t  refVertex_1,
+                                                                             real_t  thrVertex_0,
+                                                                             real_t  thrVertex_1 ) const;
 
    P2Function< real_t > rho;
 };
diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp
index 2a68954c..7f7040c3 100644
--- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp
+++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp
@@ -163,7 +163,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply( const P2
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D(
 
              _data_dst,
              _data_rhoEdge,
@@ -202,6 +202,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply( const P2
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -294,7 +295,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix( const
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D(
 
              _data_dst,
              _data_rhoEdge,
@@ -334,6 +335,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix( const
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp
index 0166e62c..a209370a 100644
--- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp
+++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -38,6 +39,7 @@
 #include "hyteg/p2functionspace/P2VectorFunction.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -51,7 +53,7 @@ namespace operatorgeneration {
 ///
 /// Weak formulation
 ///
-///     u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2))
+///     u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2))
 ///     v: test function  (space: Lagrange, degree: 1)
 ///     rho: coefficient    (space: Lagrange, degree: 2)
 ///
@@ -80,93 +82,102 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
-   ///   1938    2537      51      10      0              0                 0              0
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_rhoEdge,
-                        real_t* RESTRICT _data_rhoVertex,
-                        real_t* RESTRICT _data_src_edge_0,
-                        real_t* RESTRICT _data_src_edge_1,
-                        real_t* RESTRICT _data_src_edge_2,
-                        real_t* RESTRICT _data_src_vertex_0,
-                        real_t* RESTRICT _data_src_vertex_1,
-                        real_t* RESTRICT _data_src_vertex_2,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   ///   1938    2547      51      10      0              0                 0              1
+   void apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst,
+                                                                                   real_t* RESTRICT _data_rhoEdge,
+                                                                                   real_t* RESTRICT _data_rhoVertex,
+                                                                                   real_t* RESTRICT _data_src_edge_0,
+                                                                                   real_t* RESTRICT _data_src_edge_1,
+                                                                                   real_t* RESTRICT _data_src_edge_2,
+                                                                                   real_t* RESTRICT _data_src_vertex_0,
+                                                                                   real_t* RESTRICT _data_src_vertex_1,
+                                                                                   real_t* RESTRICT _data_src_vertex_2,
+                                                                                   real_t           forVertex_0,
+                                                                                   real_t           forVertex_1,
+                                                                                   real_t           forVertex_2,
+                                                                                   real_t           macro_vertex_coord_id_0comp0,
+                                                                                   real_t           macro_vertex_coord_id_0comp1,
+                                                                                   real_t           macro_vertex_coord_id_0comp2,
+                                                                                   real_t           macro_vertex_coord_id_1comp0,
+                                                                                   real_t           macro_vertex_coord_id_1comp1,
+                                                                                   real_t           macro_vertex_coord_id_1comp2,
+                                                                                   real_t           macro_vertex_coord_id_2comp0,
+                                                                                   real_t           macro_vertex_coord_id_2comp1,
+                                                                                   real_t           macro_vertex_coord_id_2comp2,
+                                                                                   real_t           macro_vertex_coord_id_3comp0,
+                                                                                   real_t           macro_vertex_coord_id_3comp1,
+                                                                                   real_t           macro_vertex_coord_id_3comp2,
+                                                                                   int64_t          micro_edges_per_macro_edge,
+                                                                                   real_t micro_edges_per_macro_edge_float,
+                                                                                   real_t radRayVertex,
+                                                                                   real_t radRefVertex,
+                                                                                   real_t rayVertex_0,
+                                                                                   real_t rayVertex_1,
+                                                                                   real_t rayVertex_2,
+                                                                                   real_t refVertex_0,
+                                                                                   real_t refVertex_1,
+                                                                                   real_t refVertex_2,
+                                                                                   real_t thrVertex_0,
+                                                                                   real_t thrVertex_1,
+                                                                                   real_t thrVertex_2 ) const;
+
+   /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
-   ///   1818    2417      51      10      0              0                 0              3
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_rhoEdge,
-                           real_t* RESTRICT                     _data_rhoVertex,
-                           idx_t* RESTRICT                      _data_src_edge_0,
-                           idx_t* RESTRICT                      _data_src_edge_1,
-                           idx_t* RESTRICT                      _data_src_edge_2,
-                           idx_t* RESTRICT                      _data_src_vertex_0,
-                           idx_t* RESTRICT                      _data_src_vertex_1,
-                           idx_t* RESTRICT                      _data_src_vertex_2,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   ///   1818    2427      51      10      0              0                 0              4
+   void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( idx_t* RESTRICT  _data_dst,
+                                                                                      real_t* RESTRICT _data_rhoEdge,
+                                                                                      real_t* RESTRICT _data_rhoVertex,
+                                                                                      idx_t* RESTRICT  _data_src_edge_0,
+                                                                                      idx_t* RESTRICT  _data_src_edge_1,
+                                                                                      idx_t* RESTRICT  _data_src_edge_2,
+                                                                                      idx_t* RESTRICT  _data_src_vertex_0,
+                                                                                      idx_t* RESTRICT  _data_src_vertex_1,
+                                                                                      idx_t* RESTRICT  _data_src_vertex_2,
+                                                                                      real_t           forVertex_0,
+                                                                                      real_t           forVertex_1,
+                                                                                      real_t           forVertex_2,
+                                                                                      real_t macro_vertex_coord_id_0comp0,
+                                                                                      real_t macro_vertex_coord_id_0comp1,
+                                                                                      real_t macro_vertex_coord_id_0comp2,
+                                                                                      real_t macro_vertex_coord_id_1comp0,
+                                                                                      real_t macro_vertex_coord_id_1comp1,
+                                                                                      real_t macro_vertex_coord_id_1comp2,
+                                                                                      real_t macro_vertex_coord_id_2comp0,
+                                                                                      real_t macro_vertex_coord_id_2comp1,
+                                                                                      real_t macro_vertex_coord_id_2comp2,
+                                                                                      real_t macro_vertex_coord_id_3comp0,
+                                                                                      real_t macro_vertex_coord_id_3comp1,
+                                                                                      real_t macro_vertex_coord_id_3comp2,
+                                                                                      std::shared_ptr< SparseMatrixProxy > mat,
+                                                                                      int64_t micro_edges_per_macro_edge,
+                                                                                      real_t  micro_edges_per_macro_edge_float,
+                                                                                      real_t  radRayVertex,
+                                                                                      real_t  radRefVertex,
+                                                                                      real_t  rayVertex_0,
+                                                                                      real_t  rayVertex_1,
+                                                                                      real_t  rayVertex_2,
+                                                                                      real_t  refVertex_0,
+                                                                                      real_t  refVertex_1,
+                                                                                      real_t  refVertex_2,
+                                                                                      real_t  thrVertex_0,
+                                                                                      real_t  thrVertex_1,
+                                                                                      real_t  thrVertex_2 ) const;
 
    P2Function< real_t > rho;
 };
diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
new file mode 100644
index 00000000..1afdcebe
--- /dev/null
+++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
@@ -0,0 +1,1071 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_44)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45);
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_37)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_37),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_43),_mm256_mul_pd(rho_dof_1,tmp_qloop_49)),_mm256_mul_pd(rho_dof_2,tmp_qloop_50)),_mm256_mul_pd(rho_dof_3,tmp_qloop_38)),_mm256_mul_pd(rho_dof_4,tmp_qloop_51)),_mm256_mul_pd(rho_dof_5,tmp_qloop_53)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_41),tmp_qloop_54);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_59);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_61);
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_57);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_59);
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_61);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_56);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_56);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_56);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_56);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_56);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_56);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_58);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_58);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_58);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_58);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_58);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_60);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_60);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_60);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_62);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_60);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_60);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_63);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_63);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_63);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_64);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_63);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_63);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_62);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_62);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_62);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_65);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_62);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_62);
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_64);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_64);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_64);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_65);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_64);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_64);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_9 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_40 = tmp_qloop_39*2.0;
+                   const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_42 = tmp_qloop_41*2.0;
+                   const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = rho_dof_0*(tmp_qloop_37 + tmp_qloop_44 - 3.0);
+                   const real_t tmp_qloop_46 = rho_dof_1*(tmp_qloop_37 - 1.0) + rho_dof_3*tmp_qloop_44 - rho_dof_4*tmp_qloop_44 + rho_dof_5*(-tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_45;
+                   const real_t tmp_qloop_47 = rho_dof_2*(tmp_qloop_44 - 1.0) + rho_dof_3*tmp_qloop_37 + rho_dof_4*(-tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_37 + tmp_qloop_45;
+                   const real_t tmp_qloop_49 = tmp_qloop_40 - _data_q_p_0[q];
+                   const real_t tmp_qloop_50 = tmp_qloop_42 - _data_q_p_1[q];
+                   const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
+                   const real_t tmp_qloop_52 = tmp_qloop_39*4.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
+                   const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
+                   const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
+                   const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
+                   const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t tmp_qloop_48 = tmp_qloop_46*(jac_affine_inv_0_0_GRAY*jac_blending_inv_0_0 + jac_affine_inv_0_1_GRAY*jac_blending_inv_1_0) + tmp_qloop_47*(jac_affine_inv_1_0_GRAY*jac_blending_inv_0_0 + jac_affine_inv_1_1_GRAY*jac_blending_inv_1_0);
+                   const real_t tmp_qloop_56 = tmp_qloop_48*tmp_qloop_55;
+                   const real_t tmp_qloop_60 = tmp_qloop_48*tmp_qloop_59;
+                   const real_t tmp_qloop_62 = tmp_qloop_48*tmp_qloop_61;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t tmp_qloop_57 = tmp_qloop_46*(jac_affine_inv_0_0_GRAY*jac_blending_inv_0_1 + jac_affine_inv_0_1_GRAY*jac_blending_inv_1_1) + tmp_qloop_47*(jac_affine_inv_1_0_GRAY*jac_blending_inv_0_1 + jac_affine_inv_1_1_GRAY*jac_blending_inv_1_1);
+                   const real_t tmp_qloop_58 = tmp_qloop_55*tmp_qloop_57;
+                   const real_t tmp_qloop_63 = tmp_qloop_57*tmp_qloop_59;
+                   const real_t tmp_qloop_64 = tmp_qloop_57*tmp_qloop_61;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_56;
+                   const real_t q_tmp_0_1 = tmp_qloop_49*tmp_qloop_56;
+                   const real_t q_tmp_0_2 = tmp_qloop_50*tmp_qloop_56;
+                   const real_t q_tmp_0_3 = tmp_qloop_38*tmp_qloop_56;
+                   const real_t q_tmp_0_4 = tmp_qloop_51*tmp_qloop_56;
+                   const real_t q_tmp_0_5 = tmp_qloop_53*tmp_qloop_56;
+                   const real_t q_tmp_0_6 = tmp_qloop_43*tmp_qloop_58;
+                   const real_t q_tmp_0_7 = tmp_qloop_49*tmp_qloop_58;
+                   const real_t q_tmp_0_8 = tmp_qloop_50*tmp_qloop_58;
+                   const real_t q_tmp_0_9 = tmp_qloop_38*tmp_qloop_58;
+                   const real_t q_tmp_0_10 = tmp_qloop_51*tmp_qloop_58;
+                   const real_t q_tmp_0_11 = tmp_qloop_53*tmp_qloop_58;
+                   const real_t q_tmp_1_0 = tmp_qloop_43*tmp_qloop_60;
+                   const real_t q_tmp_1_1 = tmp_qloop_49*tmp_qloop_60;
+                   const real_t q_tmp_1_2 = tmp_qloop_50*tmp_qloop_60;
+                   const real_t q_tmp_1_3 = tmp_qloop_52*tmp_qloop_62;
+                   const real_t q_tmp_1_4 = tmp_qloop_51*tmp_qloop_60;
+                   const real_t q_tmp_1_5 = tmp_qloop_53*tmp_qloop_60;
+                   const real_t q_tmp_1_6 = tmp_qloop_43*tmp_qloop_63;
+                   const real_t q_tmp_1_7 = tmp_qloop_49*tmp_qloop_63;
+                   const real_t q_tmp_1_8 = tmp_qloop_50*tmp_qloop_63;
+                   const real_t q_tmp_1_9 = tmp_qloop_52*tmp_qloop_64;
+                   const real_t q_tmp_1_10 = tmp_qloop_51*tmp_qloop_63;
+                   const real_t q_tmp_1_11 = tmp_qloop_53*tmp_qloop_63;
+                   const real_t q_tmp_2_0 = tmp_qloop_43*tmp_qloop_62;
+                   const real_t q_tmp_2_1 = tmp_qloop_49*tmp_qloop_62;
+                   const real_t q_tmp_2_2 = tmp_qloop_50*tmp_qloop_62;
+                   const real_t q_tmp_2_3 = tmp_qloop_48*tmp_qloop_65;
+                   const real_t q_tmp_2_4 = tmp_qloop_51*tmp_qloop_62;
+                   const real_t q_tmp_2_5 = tmp_qloop_53*tmp_qloop_62;
+                   const real_t q_tmp_2_6 = tmp_qloop_43*tmp_qloop_64;
+                   const real_t q_tmp_2_7 = tmp_qloop_49*tmp_qloop_64;
+                   const real_t q_tmp_2_8 = tmp_qloop_50*tmp_qloop_64;
+                   const real_t q_tmp_2_9 = tmp_qloop_57*tmp_qloop_65;
+                   const real_t q_tmp_2_10 = tmp_qloop_51*tmp_qloop_64;
+                   const real_t q_tmp_2_11 = tmp_qloop_53*tmp_qloop_64;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12));
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4);
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3);
+                   const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_44)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45);
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_37)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_37),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37);
+                   const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4));
+                   const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21);
+                   const __m256d abs_det_jac_blending = tmp_qloop_21;
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_43),_mm256_mul_pd(rho_dof_1,tmp_qloop_49)),_mm256_mul_pd(rho_dof_2,tmp_qloop_50)),_mm256_mul_pd(rho_dof_3,tmp_qloop_38)),_mm256_mul_pd(rho_dof_4,tmp_qloop_51)),_mm256_mul_pd(rho_dof_5,tmp_qloop_53)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_41),tmp_qloop_54);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22);
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_59);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_61);
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22);
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_57);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_59);
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_61);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28);
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36);
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36);
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_56);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_56);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_56);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_56);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_56);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_56);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_58);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_58);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_58);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_58);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_58);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_60);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_60);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_60);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_62);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_60);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_60);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_63);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_63);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_63);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_64);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_63);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_63);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_62);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_62);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_62);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_65);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_62);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_62);
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_64);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_64);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_64);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_65);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_64);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_64);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_9 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
+                   const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
+                   const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
+                   const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
+                   const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
+                   const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3));
+                   const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8;
+                   const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18;
+                   const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15;
+                   const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23;
+                   const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4;
+                   const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3;
+                   const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_17*2.0;
+                   const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30;
+                   const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8;
+                   const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q];
+                   const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_40 = tmp_qloop_39*2.0;
+                   const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_42 = tmp_qloop_41*2.0;
+                   const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = rho_dof_0*(tmp_qloop_37 + tmp_qloop_44 - 3.0);
+                   const real_t tmp_qloop_46 = rho_dof_1*(tmp_qloop_37 - 1.0) + rho_dof_3*tmp_qloop_44 - rho_dof_4*tmp_qloop_44 + rho_dof_5*(-tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_45;
+                   const real_t tmp_qloop_47 = rho_dof_2*(tmp_qloop_44 - 1.0) + rho_dof_3*tmp_qloop_37 + rho_dof_4*(-tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_37 + tmp_qloop_45;
+                   const real_t tmp_qloop_49 = tmp_qloop_40 - _data_q_p_0[q];
+                   const real_t tmp_qloop_50 = tmp_qloop_42 - _data_q_p_1[q];
+                   const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
+                   const real_t tmp_qloop_52 = tmp_qloop_39*4.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52;
+                   const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
+                   const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
+                   const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
+                   const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3;
+                   const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
+                   const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
+                   const real_t abs_det_jac_blending = tmp_qloop_21;
+                   const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
+                   const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
+                   const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
+                   const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
+                   const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
+                   const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
+                   const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
+                   const real_t tmp_qloop_48 = tmp_qloop_46*(jac_affine_inv_0_0_BLUE*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE*jac_blending_inv_1_0) + tmp_qloop_47*(jac_affine_inv_1_0_BLUE*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE*jac_blending_inv_1_0);
+                   const real_t tmp_qloop_56 = tmp_qloop_48*tmp_qloop_55;
+                   const real_t tmp_qloop_60 = tmp_qloop_48*tmp_qloop_59;
+                   const real_t tmp_qloop_62 = tmp_qloop_48*tmp_qloop_61;
+                   const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22;
+                   const real_t tmp_qloop_57 = tmp_qloop_46*(jac_affine_inv_0_0_BLUE*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE*jac_blending_inv_1_1) + tmp_qloop_47*(jac_affine_inv_1_0_BLUE*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE*jac_blending_inv_1_1);
+                   const real_t tmp_qloop_58 = tmp_qloop_55*tmp_qloop_57;
+                   const real_t tmp_qloop_63 = tmp_qloop_57*tmp_qloop_59;
+                   const real_t tmp_qloop_64 = tmp_qloop_57*tmp_qloop_61;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28;
+                   const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34;
+                   const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35;
+                   const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34;
+                   const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33;
+                   const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_56;
+                   const real_t q_tmp_0_1 = tmp_qloop_49*tmp_qloop_56;
+                   const real_t q_tmp_0_2 = tmp_qloop_50*tmp_qloop_56;
+                   const real_t q_tmp_0_3 = tmp_qloop_38*tmp_qloop_56;
+                   const real_t q_tmp_0_4 = tmp_qloop_51*tmp_qloop_56;
+                   const real_t q_tmp_0_5 = tmp_qloop_53*tmp_qloop_56;
+                   const real_t q_tmp_0_6 = tmp_qloop_43*tmp_qloop_58;
+                   const real_t q_tmp_0_7 = tmp_qloop_49*tmp_qloop_58;
+                   const real_t q_tmp_0_8 = tmp_qloop_50*tmp_qloop_58;
+                   const real_t q_tmp_0_9 = tmp_qloop_38*tmp_qloop_58;
+                   const real_t q_tmp_0_10 = tmp_qloop_51*tmp_qloop_58;
+                   const real_t q_tmp_0_11 = tmp_qloop_53*tmp_qloop_58;
+                   const real_t q_tmp_1_0 = tmp_qloop_43*tmp_qloop_60;
+                   const real_t q_tmp_1_1 = tmp_qloop_49*tmp_qloop_60;
+                   const real_t q_tmp_1_2 = tmp_qloop_50*tmp_qloop_60;
+                   const real_t q_tmp_1_3 = tmp_qloop_52*tmp_qloop_62;
+                   const real_t q_tmp_1_4 = tmp_qloop_51*tmp_qloop_60;
+                   const real_t q_tmp_1_5 = tmp_qloop_53*tmp_qloop_60;
+                   const real_t q_tmp_1_6 = tmp_qloop_43*tmp_qloop_63;
+                   const real_t q_tmp_1_7 = tmp_qloop_49*tmp_qloop_63;
+                   const real_t q_tmp_1_8 = tmp_qloop_50*tmp_qloop_63;
+                   const real_t q_tmp_1_9 = tmp_qloop_52*tmp_qloop_64;
+                   const real_t q_tmp_1_10 = tmp_qloop_51*tmp_qloop_63;
+                   const real_t q_tmp_1_11 = tmp_qloop_53*tmp_qloop_63;
+                   const real_t q_tmp_2_0 = tmp_qloop_43*tmp_qloop_62;
+                   const real_t q_tmp_2_1 = tmp_qloop_49*tmp_qloop_62;
+                   const real_t q_tmp_2_2 = tmp_qloop_50*tmp_qloop_62;
+                   const real_t q_tmp_2_3 = tmp_qloop_48*tmp_qloop_65;
+                   const real_t q_tmp_2_4 = tmp_qloop_51*tmp_qloop_62;
+                   const real_t q_tmp_2_5 = tmp_qloop_53*tmp_qloop_62;
+                   const real_t q_tmp_2_6 = tmp_qloop_43*tmp_qloop_64;
+                   const real_t q_tmp_2_7 = tmp_qloop_49*tmp_qloop_64;
+                   const real_t q_tmp_2_8 = tmp_qloop_50*tmp_qloop_64;
+                   const real_t q_tmp_2_9 = tmp_qloop_57*tmp_qloop_65;
+                   const real_t q_tmp_2_10 = tmp_qloop_51*tmp_qloop_64;
+                   const real_t q_tmp_2_11 = tmp_qloop_53*tmp_qloop_64;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
new file mode 100644
index 00000000..a5d573ed
--- /dev/null
+++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
@@ -0,0 +1,7929 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_edge_2, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t * RESTRICT  _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
+   
+       const real_t _data_q_p_0 [] = {0.25, 0.16666666666666666, 0.16666666666666666, 0.5, 0.16666666666666666};
+   
+       const real_t _data_q_p_1 [] = {0.25, 0.16666666666666666, 0.5, 0.16666666666666666, 0.16666666666666666};
+   
+       const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
+   
+       const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
+       const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
+       const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
+       const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
+       const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
+       const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
+       const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
+       const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
+       const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
+       const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
+       const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
+       const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
+       const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP;
+       const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP);
+       const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
+       const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
+       const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
+       const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP);
+       const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_1 = -rayVertex_0;
+       const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
+       const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
+       const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
+       const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
+       const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
+       const real_t tmp_qloop_8 = -rayVertex_1;
+       const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
+       const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = -rayVertex_2;
+       const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
+       const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
+       const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
+       const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
+       const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
+       const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
+       const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
+       const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
+       const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
+       const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+       const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
+       const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
+       const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
+       const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
+       const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
+       {
+          /* CellType.WHITE_UP */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22);
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33));
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28);
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56);
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66);
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27);
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77);
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43);
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32)));
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43);
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76);
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89);
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76);
+                   const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76);
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)));
+                   const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97);
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99);
+                   const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74);
+                   const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76);
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113);
+                   const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114);
+                   const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116);
+                   const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117));
+                   const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119);
+                   const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120);
+                   const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111);
+                   const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123);
+                   const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123);
+                   const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104);
+                   const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106);
+                   const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108);
+                   const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116);
+                   const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109);
+                   const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111);
+                   const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111);
+                   const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38);
+                   const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44);
+                   const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50);
+                   const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52);
+                   const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1);
+                   const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2);
+                   const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52);
+                   const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1);
+                   const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1));
+                   const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64);
+                   const __m256d abs_det_jac_blending = tmp_qloop_64;
+                   const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP));
+                   const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158);
+                   const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59));
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1)));
+                   const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP)))));
+                   const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136);
+                   const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142);
+                   const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144);
+                   const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145);
+                   const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147);
+                   const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135);
+                   const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157);
+                   const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP)))));
+                   const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138);
+                   const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142);
+                   const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144);
+                   const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150);
+                   const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147);
+                   const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161);
+                   const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP)))));
+                   const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140);
+                   const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142);
+                   const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144);
+                   const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154);
+                   const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147);
+                   const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84);
+                   const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84);
+                   const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143);
+                   const __m256d q_tmp_1_4 = tmp_qloop_146;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149);
+                   const __m256d q_tmp_1_14 = tmp_qloop_151;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153);
+                   const __m256d q_tmp_1_24 = tmp_qloop_155;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145);
+                   const __m256d q_tmp_2_5 = tmp_qloop_146;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150);
+                   const __m256d q_tmp_2_15 = tmp_qloop_151;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154);
+                   const __m256d q_tmp_2_25 = tmp_qloop_155;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160);
+                   const __m256d q_tmp_3_6 = tmp_qloop_146;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162);
+                   const __m256d q_tmp_3_16 = tmp_qloop_151;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163);
+                   const __m256d q_tmp_3_26 = tmp_qloop_155;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
+                   const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
+                   const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
+                   const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
+                   const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
+                   const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q];
+                   const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8;
+                   const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7);
+                   const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
+                   const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
+                   const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
+                   const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
+                   const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
+                   const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
+                   const real_t tmp_qloop_47 = -tmp_qloop_28;
+                   const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
+                   const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
+                   const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                   const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
+                   const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
+                   const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
+                   const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
+                   const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
+                   const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56;
+                   const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
+                   const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
+                   const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
+                   const real_t tmp_qloop_70 = -tmp_qloop_41;
+                   const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
+                   const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
+                   const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
+                   const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66;
+                   const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77;
+                   const real_t tmp_qloop_79 = tmp_qloop_25*2.0;
+                   const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79;
+                   const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43;
+                   const real_t tmp_qloop_82 = tmp_qloop_22*2.0;
+                   const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82;
+                   const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78;
+                   const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22;
+                   const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43;
+                   const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69;
+                   const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43;
+                   const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76;
+                   const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28;
+                   const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89;
+                   const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82;
+                   const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89;
+                   const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68;
+                   const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76;
+                   const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69;
+                   const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76;
+                   const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79;
+                   const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97;
+                   const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99;
+                   const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76;
+                   const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_104 = tmp_qloop_103*2.0;
+                   const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_106 = tmp_qloop_105*2.0;
+                   const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_108 = tmp_qloop_107*2.0;
+                   const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q];
+                   const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q];
+                   const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q];
+                   const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113;
+                   const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116;
+                   const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109;
+                   const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0);
+                   const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119;
+                   const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120;
+                   const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0;
+                   const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111;
+                   const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123;
+                   const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123;
+                   const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q];
+                   const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q];
+                   const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q];
+                   const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116;
+                   const real_t tmp_qloop_131 = tmp_qloop_105*4.0;
+                   const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
+                   const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
+                   const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
+                   const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
+                   const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
+                   const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
+                   const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
+                   const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52;
+                   const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55;
+                   const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1;
+                   const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2;
+                   const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52;
+                   const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44;
+                   const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1;
+                   const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58;
+                   const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2;
+                   const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2;
+                   const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
+                   const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
+                   const real_t abs_det_jac_blending = tmp_qloop_64;
+                   const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                   const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                   const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                   const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                   const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                   const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
+                   const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
+                   const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
+                   const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
+                   const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0);
+                   const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0);
+                   const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_0);
+                   const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136;
+                   const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142;
+                   const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144;
+                   const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145;
+                   const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147;
+                   const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135;
+                   const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157;
+                   const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0);
+                   const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_1);
+                   const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138;
+                   const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142;
+                   const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144;
+                   const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150;
+                   const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147;
+                   const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161;
+                   const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0);
+                   const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_2);
+                   const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140;
+                   const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142;
+                   const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144;
+                   const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154;
+                   const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147;
+                   const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67;
+                   const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67;
+                   const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78;
+                   const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81;
+                   const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84;
+                   const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85;
+                   const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84;
+                   const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86;
+                   const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88;
+                   const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95;
+                   const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95;
+                   const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93;
+                   const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92;
+                   const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96;
+                   const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0;
+                   const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97;
+                   const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0;
+                   const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98;
+                   const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99;
+                   const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0;
+                   const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0;
+                   const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137;
+                   const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137;
+                   const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137;
+                   const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137;
+                   const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137;
+                   const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137;
+                   const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137;
+                   const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137;
+                   const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137;
+                   const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137;
+                   const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139;
+                   const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139;
+                   const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139;
+                   const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139;
+                   const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139;
+                   const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139;
+                   const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139;
+                   const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139;
+                   const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139;
+                   const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139;
+                   const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141;
+                   const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141;
+                   const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141;
+                   const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141;
+                   const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141;
+                   const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141;
+                   const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141;
+                   const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141;
+                   const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141;
+                   const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141;
+                   const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143;
+                   const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143;
+                   const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143;
+                   const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143;
+                   const real_t q_tmp_1_4 = tmp_qloop_146;
+                   const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145;
+                   const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148;
+                   const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143;
+                   const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143;
+                   const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143;
+                   const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149;
+                   const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149;
+                   const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149;
+                   const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149;
+                   const real_t q_tmp_1_14 = tmp_qloop_151;
+                   const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150;
+                   const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152;
+                   const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149;
+                   const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149;
+                   const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149;
+                   const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153;
+                   const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153;
+                   const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153;
+                   const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153;
+                   const real_t q_tmp_1_24 = tmp_qloop_155;
+                   const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154;
+                   const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156;
+                   const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153;
+                   const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153;
+                   const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153;
+                   const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148;
+                   const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148;
+                   const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148;
+                   const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148;
+                   const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145;
+                   const real_t q_tmp_2_5 = tmp_qloop_146;
+                   const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158;
+                   const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148;
+                   const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148;
+                   const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148;
+                   const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152;
+                   const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152;
+                   const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152;
+                   const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152;
+                   const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150;
+                   const real_t q_tmp_2_15 = tmp_qloop_151;
+                   const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159;
+                   const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152;
+                   const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152;
+                   const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152;
+                   const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156;
+                   const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156;
+                   const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156;
+                   const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156;
+                   const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154;
+                   const real_t q_tmp_2_25 = tmp_qloop_155;
+                   const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159;
+                   const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156;
+                   const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156;
+                   const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156;
+                   const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145;
+                   const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145;
+                   const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145;
+                   const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145;
+                   const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160;
+                   const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160;
+                   const real_t q_tmp_3_6 = tmp_qloop_146;
+                   const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145;
+                   const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145;
+                   const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145;
+                   const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150;
+                   const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150;
+                   const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150;
+                   const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150;
+                   const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162;
+                   const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162;
+                   const real_t q_tmp_3_16 = tmp_qloop_151;
+                   const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150;
+                   const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150;
+                   const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150;
+                   const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154;
+                   const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154;
+                   const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154;
+                   const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154;
+                   const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163;
+                   const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163;
+                   const real_t q_tmp_3_26 = tmp_qloop_155;
+                   const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154;
+                   const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154;
+                   const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
+       const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
+       const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
+       const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
+       const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
+       const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
+       const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
+       const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
+       const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
+       const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN;
+       const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN);
+       const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
+       const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
+       const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
+       const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN);
+       {
+          /* CellType.WHITE_DOWN */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22);
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33));
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28);
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56);
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66);
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27);
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77);
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43);
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32)));
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43);
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76);
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89);
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76);
+                   const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76);
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)));
+                   const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97);
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99);
+                   const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74);
+                   const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76);
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113);
+                   const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114);
+                   const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116);
+                   const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117));
+                   const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119);
+                   const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120);
+                   const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111);
+                   const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123);
+                   const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123);
+                   const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104);
+                   const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106);
+                   const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108);
+                   const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116);
+                   const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109);
+                   const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111);
+                   const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111);
+                   const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38);
+                   const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44);
+                   const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50);
+                   const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52);
+                   const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1);
+                   const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2);
+                   const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52);
+                   const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1);
+                   const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1));
+                   const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64);
+                   const __m256d abs_det_jac_blending = tmp_qloop_64;
+                   const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN));
+                   const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158);
+                   const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59));
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1)));
+                   const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN)))));
+                   const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136);
+                   const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142);
+                   const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144);
+                   const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145);
+                   const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147);
+                   const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135);
+                   const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157);
+                   const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN)))));
+                   const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138);
+                   const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142);
+                   const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144);
+                   const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150);
+                   const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147);
+                   const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161);
+                   const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN)))));
+                   const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140);
+                   const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142);
+                   const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144);
+                   const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154);
+                   const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147);
+                   const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84);
+                   const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84);
+                   const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143);
+                   const __m256d q_tmp_1_4 = tmp_qloop_146;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149);
+                   const __m256d q_tmp_1_14 = tmp_qloop_151;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153);
+                   const __m256d q_tmp_1_24 = tmp_qloop_155;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145);
+                   const __m256d q_tmp_2_5 = tmp_qloop_146;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150);
+                   const __m256d q_tmp_2_15 = tmp_qloop_151;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154);
+                   const __m256d q_tmp_2_25 = tmp_qloop_155;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160);
+                   const __m256d q_tmp_3_6 = tmp_qloop_146;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162);
+                   const __m256d q_tmp_3_16 = tmp_qloop_151;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163);
+                   const __m256d q_tmp_3_26 = tmp_qloop_155;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
+                   const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
+                   const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
+                   const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
+                   const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
+                   const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q];
+                   const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8;
+                   const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7);
+                   const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
+                   const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
+                   const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
+                   const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
+                   const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
+                   const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
+                   const real_t tmp_qloop_47 = -tmp_qloop_28;
+                   const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
+                   const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
+                   const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                   const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
+                   const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
+                   const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
+                   const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
+                   const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
+                   const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56;
+                   const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
+                   const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
+                   const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
+                   const real_t tmp_qloop_70 = -tmp_qloop_41;
+                   const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
+                   const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
+                   const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
+                   const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66;
+                   const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77;
+                   const real_t tmp_qloop_79 = tmp_qloop_25*2.0;
+                   const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79;
+                   const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43;
+                   const real_t tmp_qloop_82 = tmp_qloop_22*2.0;
+                   const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82;
+                   const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78;
+                   const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22;
+                   const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43;
+                   const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69;
+                   const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43;
+                   const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76;
+                   const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28;
+                   const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89;
+                   const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82;
+                   const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89;
+                   const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68;
+                   const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76;
+                   const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69;
+                   const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76;
+                   const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79;
+                   const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97;
+                   const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99;
+                   const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76;
+                   const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_104 = tmp_qloop_103*2.0;
+                   const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_106 = tmp_qloop_105*2.0;
+                   const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_108 = tmp_qloop_107*2.0;
+                   const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q];
+                   const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q];
+                   const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q];
+                   const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113;
+                   const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116;
+                   const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109;
+                   const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0);
+                   const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119;
+                   const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120;
+                   const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0;
+                   const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111;
+                   const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123;
+                   const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123;
+                   const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q];
+                   const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q];
+                   const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q];
+                   const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116;
+                   const real_t tmp_qloop_131 = tmp_qloop_105*4.0;
+                   const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
+                   const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
+                   const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
+                   const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
+                   const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
+                   const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
+                   const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
+                   const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52;
+                   const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55;
+                   const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1;
+                   const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2;
+                   const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52;
+                   const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44;
+                   const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1;
+                   const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58;
+                   const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2;
+                   const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2;
+                   const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
+                   const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
+                   const real_t abs_det_jac_blending = tmp_qloop_64;
+                   const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                   const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                   const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                   const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                   const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                   const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
+                   const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
+                   const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
+                   const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
+                   const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0);
+                   const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0);
+                   const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_0);
+                   const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136;
+                   const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142;
+                   const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144;
+                   const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145;
+                   const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147;
+                   const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135;
+                   const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157;
+                   const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0);
+                   const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_1);
+                   const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138;
+                   const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142;
+                   const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144;
+                   const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150;
+                   const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147;
+                   const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161;
+                   const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0);
+                   const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_2);
+                   const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140;
+                   const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142;
+                   const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144;
+                   const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154;
+                   const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147;
+                   const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67;
+                   const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67;
+                   const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78;
+                   const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81;
+                   const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84;
+                   const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85;
+                   const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84;
+                   const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86;
+                   const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88;
+                   const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95;
+                   const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95;
+                   const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93;
+                   const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92;
+                   const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96;
+                   const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0;
+                   const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97;
+                   const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0;
+                   const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98;
+                   const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99;
+                   const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0;
+                   const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0;
+                   const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137;
+                   const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137;
+                   const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137;
+                   const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137;
+                   const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137;
+                   const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137;
+                   const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137;
+                   const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137;
+                   const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137;
+                   const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137;
+                   const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139;
+                   const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139;
+                   const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139;
+                   const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139;
+                   const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139;
+                   const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139;
+                   const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139;
+                   const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139;
+                   const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139;
+                   const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139;
+                   const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141;
+                   const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141;
+                   const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141;
+                   const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141;
+                   const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141;
+                   const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141;
+                   const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141;
+                   const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141;
+                   const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141;
+                   const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141;
+                   const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143;
+                   const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143;
+                   const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143;
+                   const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143;
+                   const real_t q_tmp_1_4 = tmp_qloop_146;
+                   const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145;
+                   const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148;
+                   const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143;
+                   const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143;
+                   const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143;
+                   const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149;
+                   const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149;
+                   const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149;
+                   const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149;
+                   const real_t q_tmp_1_14 = tmp_qloop_151;
+                   const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150;
+                   const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152;
+                   const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149;
+                   const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149;
+                   const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149;
+                   const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153;
+                   const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153;
+                   const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153;
+                   const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153;
+                   const real_t q_tmp_1_24 = tmp_qloop_155;
+                   const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154;
+                   const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156;
+                   const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153;
+                   const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153;
+                   const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153;
+                   const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148;
+                   const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148;
+                   const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148;
+                   const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148;
+                   const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145;
+                   const real_t q_tmp_2_5 = tmp_qloop_146;
+                   const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158;
+                   const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148;
+                   const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148;
+                   const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148;
+                   const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152;
+                   const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152;
+                   const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152;
+                   const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152;
+                   const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150;
+                   const real_t q_tmp_2_15 = tmp_qloop_151;
+                   const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159;
+                   const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152;
+                   const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152;
+                   const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152;
+                   const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156;
+                   const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156;
+                   const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156;
+                   const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156;
+                   const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154;
+                   const real_t q_tmp_2_25 = tmp_qloop_155;
+                   const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159;
+                   const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156;
+                   const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156;
+                   const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156;
+                   const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145;
+                   const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145;
+                   const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145;
+                   const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145;
+                   const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160;
+                   const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160;
+                   const real_t q_tmp_3_6 = tmp_qloop_146;
+                   const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145;
+                   const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145;
+                   const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145;
+                   const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150;
+                   const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150;
+                   const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150;
+                   const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150;
+                   const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162;
+                   const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162;
+                   const real_t q_tmp_3_16 = tmp_qloop_151;
+                   const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150;
+                   const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150;
+                   const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150;
+                   const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154;
+                   const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154;
+                   const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154;
+                   const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154;
+                   const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163;
+                   const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163;
+                   const real_t q_tmp_3_26 = tmp_qloop_155;
+                   const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154;
+                   const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154;
+                   const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
+       const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
+       const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
+       const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
+       const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
+       const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
+       const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
+       const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
+       const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
+       const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
+       const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
+       const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
+       const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
+       const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP;
+       const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP);
+       const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
+       const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
+       const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
+       const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP);
+       {
+          /* CellType.BLUE_UP */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22);
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33));
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28);
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56);
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66);
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27);
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77);
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43);
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32)));
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43);
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76);
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89);
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76);
+                   const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76);
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)));
+                   const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97);
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99);
+                   const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74);
+                   const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76);
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113);
+                   const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114);
+                   const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116);
+                   const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117));
+                   const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119);
+                   const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120);
+                   const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111);
+                   const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123);
+                   const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123);
+                   const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104);
+                   const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106);
+                   const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108);
+                   const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116);
+                   const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109);
+                   const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111);
+                   const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111);
+                   const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38);
+                   const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44);
+                   const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50);
+                   const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52);
+                   const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1);
+                   const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2);
+                   const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52);
+                   const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1);
+                   const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1));
+                   const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64);
+                   const __m256d abs_det_jac_blending = tmp_qloop_64;
+                   const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP));
+                   const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158);
+                   const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59));
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1)));
+                   const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP)))));
+                   const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136);
+                   const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142);
+                   const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144);
+                   const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145);
+                   const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147);
+                   const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135);
+                   const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157);
+                   const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP)))));
+                   const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138);
+                   const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142);
+                   const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144);
+                   const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150);
+                   const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147);
+                   const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161);
+                   const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP)))));
+                   const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140);
+                   const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142);
+                   const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144);
+                   const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154);
+                   const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147);
+                   const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84);
+                   const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84);
+                   const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143);
+                   const __m256d q_tmp_1_4 = tmp_qloop_146;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149);
+                   const __m256d q_tmp_1_14 = tmp_qloop_151;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153);
+                   const __m256d q_tmp_1_24 = tmp_qloop_155;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145);
+                   const __m256d q_tmp_2_5 = tmp_qloop_146;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150);
+                   const __m256d q_tmp_2_15 = tmp_qloop_151;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154);
+                   const __m256d q_tmp_2_25 = tmp_qloop_155;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160);
+                   const __m256d q_tmp_3_6 = tmp_qloop_146;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162);
+                   const __m256d q_tmp_3_16 = tmp_qloop_151;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163);
+                   const __m256d q_tmp_3_26 = tmp_qloop_155;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
+                   const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
+                   const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
+                   const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
+                   const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
+                   const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q];
+                   const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8;
+                   const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7);
+                   const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
+                   const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
+                   const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
+                   const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
+                   const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
+                   const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
+                   const real_t tmp_qloop_47 = -tmp_qloop_28;
+                   const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
+                   const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
+                   const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                   const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
+                   const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
+                   const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
+                   const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
+                   const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
+                   const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56;
+                   const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
+                   const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
+                   const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
+                   const real_t tmp_qloop_70 = -tmp_qloop_41;
+                   const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
+                   const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
+                   const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
+                   const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66;
+                   const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77;
+                   const real_t tmp_qloop_79 = tmp_qloop_25*2.0;
+                   const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79;
+                   const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43;
+                   const real_t tmp_qloop_82 = tmp_qloop_22*2.0;
+                   const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82;
+                   const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78;
+                   const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22;
+                   const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43;
+                   const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69;
+                   const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43;
+                   const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76;
+                   const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28;
+                   const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89;
+                   const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82;
+                   const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89;
+                   const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68;
+                   const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76;
+                   const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69;
+                   const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76;
+                   const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79;
+                   const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97;
+                   const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99;
+                   const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76;
+                   const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_104 = tmp_qloop_103*2.0;
+                   const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_106 = tmp_qloop_105*2.0;
+                   const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_108 = tmp_qloop_107*2.0;
+                   const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q];
+                   const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q];
+                   const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q];
+                   const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113;
+                   const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116;
+                   const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109;
+                   const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0);
+                   const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119;
+                   const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120;
+                   const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0;
+                   const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111;
+                   const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123;
+                   const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123;
+                   const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q];
+                   const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q];
+                   const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q];
+                   const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116;
+                   const real_t tmp_qloop_131 = tmp_qloop_105*4.0;
+                   const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
+                   const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
+                   const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
+                   const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
+                   const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
+                   const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
+                   const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
+                   const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52;
+                   const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55;
+                   const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1;
+                   const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2;
+                   const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52;
+                   const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44;
+                   const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1;
+                   const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58;
+                   const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2;
+                   const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2;
+                   const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
+                   const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
+                   const real_t abs_det_jac_blending = tmp_qloop_64;
+                   const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                   const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                   const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                   const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                   const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                   const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
+                   const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
+                   const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
+                   const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
+                   const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0);
+                   const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0);
+                   const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_0);
+                   const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136;
+                   const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142;
+                   const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144;
+                   const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145;
+                   const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147;
+                   const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135;
+                   const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157;
+                   const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0);
+                   const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_1);
+                   const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138;
+                   const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142;
+                   const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144;
+                   const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150;
+                   const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147;
+                   const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161;
+                   const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0);
+                   const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_2);
+                   const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140;
+                   const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142;
+                   const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144;
+                   const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154;
+                   const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147;
+                   const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67;
+                   const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67;
+                   const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78;
+                   const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81;
+                   const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84;
+                   const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85;
+                   const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84;
+                   const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86;
+                   const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88;
+                   const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95;
+                   const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95;
+                   const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93;
+                   const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92;
+                   const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96;
+                   const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0;
+                   const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97;
+                   const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0;
+                   const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98;
+                   const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99;
+                   const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0;
+                   const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0;
+                   const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137;
+                   const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137;
+                   const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137;
+                   const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137;
+                   const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137;
+                   const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137;
+                   const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137;
+                   const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137;
+                   const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137;
+                   const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137;
+                   const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139;
+                   const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139;
+                   const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139;
+                   const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139;
+                   const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139;
+                   const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139;
+                   const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139;
+                   const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139;
+                   const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139;
+                   const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139;
+                   const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141;
+                   const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141;
+                   const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141;
+                   const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141;
+                   const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141;
+                   const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141;
+                   const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141;
+                   const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141;
+                   const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141;
+                   const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141;
+                   const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143;
+                   const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143;
+                   const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143;
+                   const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143;
+                   const real_t q_tmp_1_4 = tmp_qloop_146;
+                   const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145;
+                   const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148;
+                   const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143;
+                   const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143;
+                   const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143;
+                   const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149;
+                   const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149;
+                   const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149;
+                   const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149;
+                   const real_t q_tmp_1_14 = tmp_qloop_151;
+                   const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150;
+                   const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152;
+                   const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149;
+                   const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149;
+                   const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149;
+                   const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153;
+                   const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153;
+                   const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153;
+                   const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153;
+                   const real_t q_tmp_1_24 = tmp_qloop_155;
+                   const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154;
+                   const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156;
+                   const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153;
+                   const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153;
+                   const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153;
+                   const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148;
+                   const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148;
+                   const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148;
+                   const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148;
+                   const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145;
+                   const real_t q_tmp_2_5 = tmp_qloop_146;
+                   const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158;
+                   const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148;
+                   const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148;
+                   const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148;
+                   const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152;
+                   const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152;
+                   const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152;
+                   const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152;
+                   const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150;
+                   const real_t q_tmp_2_15 = tmp_qloop_151;
+                   const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159;
+                   const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152;
+                   const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152;
+                   const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152;
+                   const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156;
+                   const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156;
+                   const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156;
+                   const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156;
+                   const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154;
+                   const real_t q_tmp_2_25 = tmp_qloop_155;
+                   const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159;
+                   const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156;
+                   const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156;
+                   const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156;
+                   const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145;
+                   const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145;
+                   const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145;
+                   const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145;
+                   const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160;
+                   const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160;
+                   const real_t q_tmp_3_6 = tmp_qloop_146;
+                   const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145;
+                   const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145;
+                   const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145;
+                   const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150;
+                   const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150;
+                   const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150;
+                   const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150;
+                   const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162;
+                   const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162;
+                   const real_t q_tmp_3_16 = tmp_qloop_151;
+                   const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150;
+                   const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150;
+                   const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150;
+                   const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154;
+                   const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154;
+                   const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154;
+                   const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154;
+                   const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163;
+                   const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163;
+                   const real_t q_tmp_3_26 = tmp_qloop_155;
+                   const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154;
+                   const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154;
+                   const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
+       const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
+       const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
+       const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
+       const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
+       const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
+       const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
+       const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN;
+       const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN);
+       const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
+       const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
+       const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
+       const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN);
+       {
+          /* CellType.BLUE_DOWN */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22);
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33));
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28);
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56);
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66);
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27);
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77);
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43);
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32)));
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43);
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76);
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89);
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76);
+                   const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76);
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)));
+                   const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97);
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99);
+                   const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74);
+                   const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76);
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113);
+                   const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114);
+                   const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116);
+                   const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117));
+                   const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119);
+                   const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120);
+                   const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111);
+                   const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123);
+                   const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123);
+                   const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104);
+                   const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106);
+                   const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108);
+                   const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116);
+                   const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109);
+                   const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111);
+                   const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111);
+                   const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38);
+                   const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44);
+                   const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50);
+                   const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52);
+                   const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1);
+                   const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2);
+                   const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52);
+                   const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1);
+                   const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1));
+                   const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64);
+                   const __m256d abs_det_jac_blending = tmp_qloop_64;
+                   const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN));
+                   const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158);
+                   const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59));
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1)));
+                   const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN)))));
+                   const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136);
+                   const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142);
+                   const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144);
+                   const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145);
+                   const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147);
+                   const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135);
+                   const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157);
+                   const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN)))));
+                   const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138);
+                   const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142);
+                   const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144);
+                   const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150);
+                   const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147);
+                   const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161);
+                   const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN)))));
+                   const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140);
+                   const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142);
+                   const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144);
+                   const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154);
+                   const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147);
+                   const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84);
+                   const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84);
+                   const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143);
+                   const __m256d q_tmp_1_4 = tmp_qloop_146;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149);
+                   const __m256d q_tmp_1_14 = tmp_qloop_151;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153);
+                   const __m256d q_tmp_1_24 = tmp_qloop_155;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145);
+                   const __m256d q_tmp_2_5 = tmp_qloop_146;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150);
+                   const __m256d q_tmp_2_15 = tmp_qloop_151;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154);
+                   const __m256d q_tmp_2_25 = tmp_qloop_155;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160);
+                   const __m256d q_tmp_3_6 = tmp_qloop_146;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162);
+                   const __m256d q_tmp_3_16 = tmp_qloop_151;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163);
+                   const __m256d q_tmp_3_26 = tmp_qloop_155;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
+                   const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
+                   const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
+                   const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
+                   const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
+                   const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q];
+                   const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8;
+                   const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7);
+                   const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
+                   const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
+                   const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
+                   const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
+                   const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
+                   const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
+                   const real_t tmp_qloop_47 = -tmp_qloop_28;
+                   const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
+                   const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
+                   const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                   const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
+                   const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
+                   const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
+                   const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
+                   const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
+                   const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56;
+                   const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
+                   const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
+                   const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
+                   const real_t tmp_qloop_70 = -tmp_qloop_41;
+                   const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
+                   const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
+                   const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
+                   const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66;
+                   const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77;
+                   const real_t tmp_qloop_79 = tmp_qloop_25*2.0;
+                   const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79;
+                   const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43;
+                   const real_t tmp_qloop_82 = tmp_qloop_22*2.0;
+                   const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82;
+                   const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78;
+                   const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22;
+                   const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43;
+                   const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69;
+                   const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43;
+                   const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76;
+                   const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28;
+                   const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89;
+                   const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82;
+                   const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89;
+                   const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68;
+                   const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76;
+                   const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69;
+                   const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76;
+                   const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79;
+                   const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97;
+                   const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99;
+                   const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76;
+                   const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_104 = tmp_qloop_103*2.0;
+                   const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_106 = tmp_qloop_105*2.0;
+                   const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_108 = tmp_qloop_107*2.0;
+                   const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q];
+                   const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q];
+                   const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q];
+                   const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113;
+                   const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116;
+                   const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109;
+                   const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0);
+                   const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119;
+                   const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120;
+                   const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0;
+                   const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111;
+                   const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123;
+                   const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123;
+                   const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q];
+                   const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q];
+                   const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q];
+                   const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116;
+                   const real_t tmp_qloop_131 = tmp_qloop_105*4.0;
+                   const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
+                   const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
+                   const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
+                   const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
+                   const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
+                   const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
+                   const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
+                   const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52;
+                   const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55;
+                   const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1;
+                   const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2;
+                   const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52;
+                   const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44;
+                   const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1;
+                   const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58;
+                   const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2;
+                   const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2;
+                   const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
+                   const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
+                   const real_t abs_det_jac_blending = tmp_qloop_64;
+                   const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                   const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                   const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                   const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                   const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                   const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
+                   const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
+                   const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
+                   const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
+                   const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0);
+                   const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0);
+                   const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_0);
+                   const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136;
+                   const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142;
+                   const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144;
+                   const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145;
+                   const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147;
+                   const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135;
+                   const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157;
+                   const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0);
+                   const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_1);
+                   const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138;
+                   const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142;
+                   const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144;
+                   const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150;
+                   const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147;
+                   const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161;
+                   const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0);
+                   const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_2);
+                   const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140;
+                   const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142;
+                   const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144;
+                   const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154;
+                   const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147;
+                   const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67;
+                   const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67;
+                   const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78;
+                   const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81;
+                   const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84;
+                   const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85;
+                   const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84;
+                   const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86;
+                   const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88;
+                   const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95;
+                   const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95;
+                   const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93;
+                   const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92;
+                   const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96;
+                   const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0;
+                   const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97;
+                   const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0;
+                   const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98;
+                   const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99;
+                   const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0;
+                   const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0;
+                   const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137;
+                   const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137;
+                   const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137;
+                   const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137;
+                   const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137;
+                   const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137;
+                   const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137;
+                   const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137;
+                   const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137;
+                   const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137;
+                   const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139;
+                   const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139;
+                   const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139;
+                   const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139;
+                   const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139;
+                   const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139;
+                   const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139;
+                   const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139;
+                   const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139;
+                   const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139;
+                   const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141;
+                   const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141;
+                   const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141;
+                   const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141;
+                   const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141;
+                   const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141;
+                   const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141;
+                   const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141;
+                   const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141;
+                   const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141;
+                   const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143;
+                   const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143;
+                   const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143;
+                   const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143;
+                   const real_t q_tmp_1_4 = tmp_qloop_146;
+                   const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145;
+                   const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148;
+                   const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143;
+                   const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143;
+                   const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143;
+                   const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149;
+                   const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149;
+                   const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149;
+                   const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149;
+                   const real_t q_tmp_1_14 = tmp_qloop_151;
+                   const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150;
+                   const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152;
+                   const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149;
+                   const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149;
+                   const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149;
+                   const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153;
+                   const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153;
+                   const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153;
+                   const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153;
+                   const real_t q_tmp_1_24 = tmp_qloop_155;
+                   const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154;
+                   const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156;
+                   const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153;
+                   const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153;
+                   const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153;
+                   const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148;
+                   const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148;
+                   const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148;
+                   const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148;
+                   const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145;
+                   const real_t q_tmp_2_5 = tmp_qloop_146;
+                   const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158;
+                   const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148;
+                   const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148;
+                   const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148;
+                   const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152;
+                   const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152;
+                   const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152;
+                   const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152;
+                   const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150;
+                   const real_t q_tmp_2_15 = tmp_qloop_151;
+                   const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159;
+                   const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152;
+                   const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152;
+                   const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152;
+                   const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156;
+                   const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156;
+                   const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156;
+                   const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156;
+                   const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154;
+                   const real_t q_tmp_2_25 = tmp_qloop_155;
+                   const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159;
+                   const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156;
+                   const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156;
+                   const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156;
+                   const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145;
+                   const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145;
+                   const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145;
+                   const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145;
+                   const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160;
+                   const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160;
+                   const real_t q_tmp_3_6 = tmp_qloop_146;
+                   const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145;
+                   const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145;
+                   const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145;
+                   const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150;
+                   const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150;
+                   const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150;
+                   const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150;
+                   const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162;
+                   const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162;
+                   const real_t q_tmp_3_16 = tmp_qloop_151;
+                   const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150;
+                   const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150;
+                   const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150;
+                   const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154;
+                   const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154;
+                   const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154;
+                   const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154;
+                   const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163;
+                   const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163;
+                   const real_t q_tmp_3_26 = tmp_qloop_155;
+                   const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154;
+                   const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154;
+                   const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
+       const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
+       const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
+       const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
+       const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
+       const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
+       const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
+       const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
+       const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
+       const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
+       const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
+       const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
+       const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
+       const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
+       const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
+       const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
+       const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP;
+       const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP);
+       const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
+       const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
+       const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
+       const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP);
+       {
+          /* CellType.GREEN_UP */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22);
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33));
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28);
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56);
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66);
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27);
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77);
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43);
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32)));
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43);
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76);
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89);
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76);
+                   const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76);
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)));
+                   const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97);
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99);
+                   const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74);
+                   const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76);
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113);
+                   const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114);
+                   const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116);
+                   const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117));
+                   const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119);
+                   const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120);
+                   const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111);
+                   const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123);
+                   const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123);
+                   const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104);
+                   const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106);
+                   const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108);
+                   const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116);
+                   const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109);
+                   const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111);
+                   const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111);
+                   const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38);
+                   const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44);
+                   const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50);
+                   const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52);
+                   const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1);
+                   const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2);
+                   const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52);
+                   const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1);
+                   const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1));
+                   const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64);
+                   const __m256d abs_det_jac_blending = tmp_qloop_64;
+                   const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP));
+                   const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158);
+                   const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59));
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1)));
+                   const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP)))));
+                   const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136);
+                   const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142);
+                   const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144);
+                   const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145);
+                   const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147);
+                   const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135);
+                   const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157);
+                   const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP)))));
+                   const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138);
+                   const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142);
+                   const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144);
+                   const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150);
+                   const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147);
+                   const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161);
+                   const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP)))));
+                   const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140);
+                   const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142);
+                   const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144);
+                   const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154);
+                   const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147);
+                   const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84);
+                   const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84);
+                   const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143);
+                   const __m256d q_tmp_1_4 = tmp_qloop_146;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149);
+                   const __m256d q_tmp_1_14 = tmp_qloop_151;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153);
+                   const __m256d q_tmp_1_24 = tmp_qloop_155;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145);
+                   const __m256d q_tmp_2_5 = tmp_qloop_146;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150);
+                   const __m256d q_tmp_2_15 = tmp_qloop_151;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154);
+                   const __m256d q_tmp_2_25 = tmp_qloop_155;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160);
+                   const __m256d q_tmp_3_6 = tmp_qloop_146;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162);
+                   const __m256d q_tmp_3_16 = tmp_qloop_151;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163);
+                   const __m256d q_tmp_3_26 = tmp_qloop_155;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
+                   const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
+                   const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
+                   const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
+                   const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
+                   const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q];
+                   const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8;
+                   const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7);
+                   const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
+                   const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
+                   const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
+                   const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
+                   const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
+                   const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
+                   const real_t tmp_qloop_47 = -tmp_qloop_28;
+                   const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
+                   const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
+                   const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                   const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
+                   const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
+                   const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
+                   const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
+                   const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
+                   const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56;
+                   const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
+                   const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
+                   const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
+                   const real_t tmp_qloop_70 = -tmp_qloop_41;
+                   const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
+                   const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
+                   const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
+                   const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66;
+                   const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77;
+                   const real_t tmp_qloop_79 = tmp_qloop_25*2.0;
+                   const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79;
+                   const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43;
+                   const real_t tmp_qloop_82 = tmp_qloop_22*2.0;
+                   const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82;
+                   const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78;
+                   const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22;
+                   const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43;
+                   const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69;
+                   const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43;
+                   const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76;
+                   const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28;
+                   const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89;
+                   const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82;
+                   const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89;
+                   const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68;
+                   const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76;
+                   const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69;
+                   const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76;
+                   const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79;
+                   const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97;
+                   const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99;
+                   const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76;
+                   const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_104 = tmp_qloop_103*2.0;
+                   const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_106 = tmp_qloop_105*2.0;
+                   const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_108 = tmp_qloop_107*2.0;
+                   const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q];
+                   const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q];
+                   const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q];
+                   const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113;
+                   const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116;
+                   const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109;
+                   const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0);
+                   const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119;
+                   const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120;
+                   const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0;
+                   const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111;
+                   const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123;
+                   const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123;
+                   const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q];
+                   const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q];
+                   const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q];
+                   const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116;
+                   const real_t tmp_qloop_131 = tmp_qloop_105*4.0;
+                   const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
+                   const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
+                   const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
+                   const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
+                   const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
+                   const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
+                   const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
+                   const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52;
+                   const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55;
+                   const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1;
+                   const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2;
+                   const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52;
+                   const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44;
+                   const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1;
+                   const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58;
+                   const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2;
+                   const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2;
+                   const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
+                   const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
+                   const real_t abs_det_jac_blending = tmp_qloop_64;
+                   const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                   const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                   const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                   const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                   const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                   const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
+                   const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
+                   const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
+                   const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
+                   const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0);
+                   const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0);
+                   const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_0);
+                   const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136;
+                   const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142;
+                   const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144;
+                   const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145;
+                   const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147;
+                   const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135;
+                   const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157;
+                   const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0);
+                   const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_1);
+                   const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138;
+                   const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142;
+                   const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144;
+                   const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150;
+                   const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147;
+                   const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161;
+                   const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0);
+                   const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_2);
+                   const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140;
+                   const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142;
+                   const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144;
+                   const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154;
+                   const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147;
+                   const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67;
+                   const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67;
+                   const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78;
+                   const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81;
+                   const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84;
+                   const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85;
+                   const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84;
+                   const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86;
+                   const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88;
+                   const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95;
+                   const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95;
+                   const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93;
+                   const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92;
+                   const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96;
+                   const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0;
+                   const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97;
+                   const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0;
+                   const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98;
+                   const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99;
+                   const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0;
+                   const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0;
+                   const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137;
+                   const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137;
+                   const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137;
+                   const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137;
+                   const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137;
+                   const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137;
+                   const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137;
+                   const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137;
+                   const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137;
+                   const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137;
+                   const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139;
+                   const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139;
+                   const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139;
+                   const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139;
+                   const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139;
+                   const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139;
+                   const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139;
+                   const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139;
+                   const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139;
+                   const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139;
+                   const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141;
+                   const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141;
+                   const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141;
+                   const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141;
+                   const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141;
+                   const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141;
+                   const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141;
+                   const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141;
+                   const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141;
+                   const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141;
+                   const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143;
+                   const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143;
+                   const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143;
+                   const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143;
+                   const real_t q_tmp_1_4 = tmp_qloop_146;
+                   const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145;
+                   const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148;
+                   const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143;
+                   const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143;
+                   const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143;
+                   const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149;
+                   const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149;
+                   const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149;
+                   const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149;
+                   const real_t q_tmp_1_14 = tmp_qloop_151;
+                   const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150;
+                   const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152;
+                   const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149;
+                   const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149;
+                   const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149;
+                   const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153;
+                   const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153;
+                   const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153;
+                   const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153;
+                   const real_t q_tmp_1_24 = tmp_qloop_155;
+                   const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154;
+                   const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156;
+                   const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153;
+                   const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153;
+                   const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153;
+                   const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148;
+                   const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148;
+                   const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148;
+                   const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148;
+                   const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145;
+                   const real_t q_tmp_2_5 = tmp_qloop_146;
+                   const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158;
+                   const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148;
+                   const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148;
+                   const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148;
+                   const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152;
+                   const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152;
+                   const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152;
+                   const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152;
+                   const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150;
+                   const real_t q_tmp_2_15 = tmp_qloop_151;
+                   const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159;
+                   const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152;
+                   const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152;
+                   const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152;
+                   const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156;
+                   const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156;
+                   const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156;
+                   const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156;
+                   const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154;
+                   const real_t q_tmp_2_25 = tmp_qloop_155;
+                   const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159;
+                   const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156;
+                   const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156;
+                   const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156;
+                   const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145;
+                   const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145;
+                   const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145;
+                   const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145;
+                   const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160;
+                   const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160;
+                   const real_t q_tmp_3_6 = tmp_qloop_146;
+                   const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145;
+                   const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145;
+                   const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145;
+                   const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150;
+                   const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150;
+                   const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150;
+                   const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150;
+                   const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162;
+                   const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162;
+                   const real_t q_tmp_3_16 = tmp_qloop_151;
+                   const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150;
+                   const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150;
+                   const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150;
+                   const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154;
+                   const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154;
+                   const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154;
+                   const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154;
+                   const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163;
+                   const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163;
+                   const real_t q_tmp_3_26 = tmp_qloop_155;
+                   const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154;
+                   const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154;
+                   const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
+       const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
+       const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
+       const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
+       const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
+       const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
+       const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
+       const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
+       const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
+       const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
+       const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN;
+       const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN);
+       const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
+       const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
+       const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
+       const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN);
+       {
+          /* CellType.GREEN_DOWN */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21));
+                   const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22);
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33));
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28);
+                   const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46));
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56);
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66);
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27);
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77);
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43);
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43);
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32)));
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43);
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76);
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89);
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89);
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76);
+                   const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76);
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)));
+                   const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97);
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99);
+                   const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74);
+                   const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76);
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113);
+                   const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114);
+                   const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116);
+                   const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117));
+                   const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119);
+                   const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120);
+                   const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111);
+                   const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123);
+                   const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123);
+                   const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104);
+                   const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106);
+                   const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108);
+                   const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116);
+                   const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109);
+                   const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111);
+                   const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111);
+                   const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38);
+                   const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44);
+                   const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50);
+                   const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52);
+                   const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1);
+                   const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2);
+                   const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52);
+                   const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1);
+                   const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2);
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1));
+                   const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64);
+                   const __m256d abs_det_jac_blending = tmp_qloop_64;
+                   const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN));
+                   const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158);
+                   const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135);
+                   const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59));
+                   const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1)));
+                   const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60));
+                   const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN)))));
+                   const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136);
+                   const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142);
+                   const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144);
+                   const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145);
+                   const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147);
+                   const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135);
+                   const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157);
+                   const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN)))));
+                   const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138);
+                   const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142);
+                   const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144);
+                   const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150);
+                   const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147);
+                   const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161);
+                   const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                   const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN)))));
+                   const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140);
+                   const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142);
+                   const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144);
+                   const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154);
+                   const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147);
+                   const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161);
+                   const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84);
+                   const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84);
+                   const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52);
+                   const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92));
+                   const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44);
+                   const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50);
+                   const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143);
+                   const __m256d q_tmp_1_4 = tmp_qloop_146;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149);
+                   const __m256d q_tmp_1_14 = tmp_qloop_151;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153);
+                   const __m256d q_tmp_1_24 = tmp_qloop_155;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145);
+                   const __m256d q_tmp_2_5 = tmp_qloop_146;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150);
+                   const __m256d q_tmp_2_15 = tmp_qloop_151;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154);
+                   const __m256d q_tmp_2_25 = tmp_qloop_155;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160);
+                   const __m256d q_tmp_3_6 = tmp_qloop_146;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162);
+                   const __m256d q_tmp_3_16 = tmp_qloop_151;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163);
+                   const __m256d q_tmp_3_26 = tmp_qloop_155;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
+                   const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
+                   const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
+                   const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
+                   const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
+                   const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q];
+                   const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8;
+                   const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7);
+                   const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27;
+                   const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
+                   const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
+                   const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
+                   const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
+                   const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
+                   const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
+                   const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
+                   const real_t tmp_qloop_47 = -tmp_qloop_28;
+                   const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
+                   const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
+                   const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                   const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
+                   const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
+                   const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
+                   const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
+                   const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
+                   const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56;
+                   const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
+                   const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
+                   const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
+                   const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
+                   const real_t tmp_qloop_70 = -tmp_qloop_41;
+                   const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
+                   const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
+                   const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
+                   const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66;
+                   const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77;
+                   const real_t tmp_qloop_79 = tmp_qloop_25*2.0;
+                   const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79;
+                   const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43;
+                   const real_t tmp_qloop_82 = tmp_qloop_22*2.0;
+                   const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82;
+                   const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78;
+                   const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22;
+                   const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43;
+                   const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69;
+                   const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43;
+                   const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76;
+                   const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28;
+                   const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89;
+                   const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82;
+                   const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89;
+                   const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68;
+                   const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76;
+                   const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69;
+                   const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76;
+                   const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79;
+                   const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97;
+                   const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99;
+                   const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74;
+                   const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76;
+                   const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_104 = tmp_qloop_103*2.0;
+                   const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_106 = tmp_qloop_105*2.0;
+                   const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_108 = tmp_qloop_107*2.0;
+                   const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q];
+                   const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q];
+                   const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q];
+                   const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113;
+                   const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116;
+                   const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109;
+                   const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0);
+                   const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119;
+                   const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120;
+                   const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0;
+                   const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111;
+                   const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123;
+                   const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123;
+                   const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q];
+                   const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q];
+                   const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q];
+                   const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116;
+                   const real_t tmp_qloop_131 = tmp_qloop_105*4.0;
+                   const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
+                   const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
+                   const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
+                   const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
+                   const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
+                   const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
+                   const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
+                   const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52;
+                   const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55;
+                   const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1;
+                   const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2;
+                   const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52;
+                   const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44;
+                   const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1;
+                   const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58;
+                   const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2;
+                   const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2;
+                   const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
+                   const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
+                   const real_t abs_det_jac_blending = tmp_qloop_64;
+                   const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                   const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                   const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                   const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                   const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                   const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
+                   const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
+                   const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
+                   const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
+                   const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0);
+                   const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0);
+                   const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0);
+                   const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_0);
+                   const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136;
+                   const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142;
+                   const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144;
+                   const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145;
+                   const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147;
+                   const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135;
+                   const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157;
+                   const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0);
+                   const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_1);
+                   const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138;
+                   const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142;
+                   const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144;
+                   const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150;
+                   const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147;
+                   const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161;
+                   const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0);
+                   const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_2);
+                   const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140;
+                   const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142;
+                   const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144;
+                   const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154;
+                   const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147;
+                   const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161;
+                   const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67;
+                   const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67;
+                   const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67;
+                   const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78;
+                   const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81;
+                   const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84;
+                   const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85;
+                   const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84;
+                   const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86;
+                   const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88;
+                   const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93;
+                   const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95;
+                   const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95;
+                   const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95;
+                   const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93;
+                   const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91;
+                   const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92;
+                   const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96;
+                   const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0;
+                   const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97;
+                   const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0;
+                   const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98;
+                   const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99;
+                   const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0;
+                   const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0;
+                   const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137;
+                   const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137;
+                   const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137;
+                   const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137;
+                   const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137;
+                   const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137;
+                   const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137;
+                   const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137;
+                   const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137;
+                   const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137;
+                   const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139;
+                   const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139;
+                   const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139;
+                   const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139;
+                   const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139;
+                   const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139;
+                   const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139;
+                   const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139;
+                   const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139;
+                   const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139;
+                   const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141;
+                   const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141;
+                   const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141;
+                   const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141;
+                   const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141;
+                   const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141;
+                   const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141;
+                   const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141;
+                   const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141;
+                   const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141;
+                   const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143;
+                   const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143;
+                   const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143;
+                   const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143;
+                   const real_t q_tmp_1_4 = tmp_qloop_146;
+                   const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145;
+                   const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148;
+                   const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143;
+                   const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143;
+                   const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143;
+                   const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149;
+                   const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149;
+                   const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149;
+                   const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149;
+                   const real_t q_tmp_1_14 = tmp_qloop_151;
+                   const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150;
+                   const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152;
+                   const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149;
+                   const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149;
+                   const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149;
+                   const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153;
+                   const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153;
+                   const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153;
+                   const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153;
+                   const real_t q_tmp_1_24 = tmp_qloop_155;
+                   const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154;
+                   const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156;
+                   const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153;
+                   const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153;
+                   const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153;
+                   const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148;
+                   const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148;
+                   const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148;
+                   const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148;
+                   const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145;
+                   const real_t q_tmp_2_5 = tmp_qloop_146;
+                   const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158;
+                   const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148;
+                   const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148;
+                   const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148;
+                   const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152;
+                   const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152;
+                   const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152;
+                   const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152;
+                   const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150;
+                   const real_t q_tmp_2_15 = tmp_qloop_151;
+                   const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159;
+                   const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152;
+                   const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152;
+                   const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152;
+                   const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156;
+                   const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156;
+                   const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156;
+                   const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156;
+                   const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154;
+                   const real_t q_tmp_2_25 = tmp_qloop_155;
+                   const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159;
+                   const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156;
+                   const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156;
+                   const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156;
+                   const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145;
+                   const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145;
+                   const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145;
+                   const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145;
+                   const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160;
+                   const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160;
+                   const real_t q_tmp_3_6 = tmp_qloop_146;
+                   const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145;
+                   const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145;
+                   const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145;
+                   const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150;
+                   const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150;
+                   const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150;
+                   const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150;
+                   const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162;
+                   const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162;
+                   const real_t q_tmp_3_16 = tmp_qloop_151;
+                   const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150;
+                   const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150;
+                   const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150;
+                   const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154;
+                   const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154;
+                   const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154;
+                   const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154;
+                   const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163;
+                   const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163;
+                   const real_t q_tmp_3_26 = tmp_qloop_155;
+                   const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154;
+                   const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154;
+                   const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
new file mode 100644
index 00000000..adea9287
--- /dev/null
+++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
@@ -0,0 +1,874 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_7)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)));
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7);
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_6),_mm256_mul_pd(rho_dof_1,tmp_qloop_12)),_mm256_mul_pd(rho_dof_2,tmp_qloop_13)),_mm256_mul_pd(rho_dof_3,tmp_qloop_1)),_mm256_mul_pd(rho_dof_4,tmp_qloop_14)),_mm256_mul_pd(rho_dof_5,tmp_qloop_16))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_18);
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_20);
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_22);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_22);
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_24);
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_17),tmp_qloop_4);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_6);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_19);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_19);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_19);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_19);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_19);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_6);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_21);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_21);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_21);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_21);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_21);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_6);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_23);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_23);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_25);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_23);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_23);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_6);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_26);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_26);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_27);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_26);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_26);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_6);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_25);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_25);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_28);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_25);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_25);
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_6);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_27);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_27);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_28);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_27);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_27);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_9 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = rho_dof_0*(tmp_qloop_0 + tmp_qloop_7 - 3.0);
+                   const real_t tmp_qloop_9 = rho_dof_1*(tmp_qloop_0 - 1.0) + rho_dof_3*tmp_qloop_7 - rho_dof_4*tmp_qloop_7 + rho_dof_5*(-tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_8;
+                   const real_t tmp_qloop_10 = rho_dof_2*(tmp_qloop_7 - 1.0) + rho_dof_3*tmp_qloop_0 + rho_dof_4*(-tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_0 + tmp_qloop_8;
+                   const real_t tmp_qloop_11 = jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_10;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 - _data_q_p_0[q];
+                   const real_t tmp_qloop_13 = tmp_qloop_5 - _data_q_p_1[q];
+                   const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
+                   const real_t tmp_qloop_15 = tmp_qloop_2*4.0;
+                   const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15;
+                   const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
+                   const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
+                   const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18;
+                   const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10;
+                   const real_t tmp_qloop_21 = tmp_qloop_18*tmp_qloop_20;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*_data_q_p_0[q];
+                   const real_t tmp_qloop_23 = tmp_qloop_11*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = tmp_qloop_17*_data_q_p_1[q];
+                   const real_t tmp_qloop_25 = tmp_qloop_11*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_20*tmp_qloop_22;
+                   const real_t tmp_qloop_27 = tmp_qloop_20*tmp_qloop_24;
+                   const real_t tmp_qloop_28 = tmp_qloop_0*tmp_qloop_17*tmp_qloop_4;
+                   const real_t q_tmp_0_0 = tmp_qloop_19*tmp_qloop_6;
+                   const real_t q_tmp_0_1 = tmp_qloop_12*tmp_qloop_19;
+                   const real_t q_tmp_0_2 = tmp_qloop_13*tmp_qloop_19;
+                   const real_t q_tmp_0_3 = tmp_qloop_1*tmp_qloop_19;
+                   const real_t q_tmp_0_4 = tmp_qloop_14*tmp_qloop_19;
+                   const real_t q_tmp_0_5 = tmp_qloop_16*tmp_qloop_19;
+                   const real_t q_tmp_0_6 = tmp_qloop_21*tmp_qloop_6;
+                   const real_t q_tmp_0_7 = tmp_qloop_12*tmp_qloop_21;
+                   const real_t q_tmp_0_8 = tmp_qloop_13*tmp_qloop_21;
+                   const real_t q_tmp_0_9 = tmp_qloop_1*tmp_qloop_21;
+                   const real_t q_tmp_0_10 = tmp_qloop_14*tmp_qloop_21;
+                   const real_t q_tmp_0_11 = tmp_qloop_16*tmp_qloop_21;
+                   const real_t q_tmp_1_0 = tmp_qloop_23*tmp_qloop_6;
+                   const real_t q_tmp_1_1 = tmp_qloop_12*tmp_qloop_23;
+                   const real_t q_tmp_1_2 = tmp_qloop_13*tmp_qloop_23;
+                   const real_t q_tmp_1_3 = tmp_qloop_15*tmp_qloop_25;
+                   const real_t q_tmp_1_4 = tmp_qloop_14*tmp_qloop_23;
+                   const real_t q_tmp_1_5 = tmp_qloop_16*tmp_qloop_23;
+                   const real_t q_tmp_1_6 = tmp_qloop_26*tmp_qloop_6;
+                   const real_t q_tmp_1_7 = tmp_qloop_12*tmp_qloop_26;
+                   const real_t q_tmp_1_8 = tmp_qloop_13*tmp_qloop_26;
+                   const real_t q_tmp_1_9 = tmp_qloop_15*tmp_qloop_27;
+                   const real_t q_tmp_1_10 = tmp_qloop_14*tmp_qloop_26;
+                   const real_t q_tmp_1_11 = tmp_qloop_16*tmp_qloop_26;
+                   const real_t q_tmp_2_0 = tmp_qloop_25*tmp_qloop_6;
+                   const real_t q_tmp_2_1 = tmp_qloop_12*tmp_qloop_25;
+                   const real_t q_tmp_2_2 = tmp_qloop_13*tmp_qloop_25;
+                   const real_t q_tmp_2_3 = tmp_qloop_11*tmp_qloop_28;
+                   const real_t q_tmp_2_4 = tmp_qloop_14*tmp_qloop_25;
+                   const real_t q_tmp_2_5 = tmp_qloop_16*tmp_qloop_25;
+                   const real_t q_tmp_2_6 = tmp_qloop_27*tmp_qloop_6;
+                   const real_t q_tmp_2_7 = tmp_qloop_12*tmp_qloop_27;
+                   const real_t q_tmp_2_8 = tmp_qloop_13*tmp_qloop_27;
+                   const real_t q_tmp_2_9 = tmp_qloop_20*tmp_qloop_28;
+                   const real_t q_tmp_2_10 = tmp_qloop_14*tmp_qloop_27;
+                   const real_t q_tmp_2_11 = tmp_qloop_16*tmp_qloop_27;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5);
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7));
+                   const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_7)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)));
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7);
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_6),_mm256_mul_pd(rho_dof_1,tmp_qloop_12)),_mm256_mul_pd(rho_dof_2,tmp_qloop_13)),_mm256_mul_pd(rho_dof_3,tmp_qloop_1)),_mm256_mul_pd(rho_dof_4,tmp_qloop_14)),_mm256_mul_pd(rho_dof_5,tmp_qloop_16))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_18);
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_20);
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_22);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_24);
+                   const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_22);
+                   const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_24);
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_17),tmp_qloop_4);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_6);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_19);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_19);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_19);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_19);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_19);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_6);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_21);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_21);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_21);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_21);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_21);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_6);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_23);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_23);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_25);
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_23);
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_23);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_6);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_26);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_26);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_27);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_26);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_26);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_6);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_25);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_25);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_28);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_25);
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_25);
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_6);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_27);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_27);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_28);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_27);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_27);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_9 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0;
+                   const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = rho_dof_0*(tmp_qloop_0 + tmp_qloop_7 - 3.0);
+                   const real_t tmp_qloop_9 = rho_dof_1*(tmp_qloop_0 - 1.0) + rho_dof_3*tmp_qloop_7 - rho_dof_4*tmp_qloop_7 + rho_dof_5*(-tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_8;
+                   const real_t tmp_qloop_10 = rho_dof_2*(tmp_qloop_7 - 1.0) + rho_dof_3*tmp_qloop_0 + rho_dof_4*(-tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_0 + tmp_qloop_8;
+                   const real_t tmp_qloop_11 = jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_10;
+                   const real_t tmp_qloop_12 = tmp_qloop_3 - _data_q_p_0[q];
+                   const real_t tmp_qloop_13 = tmp_qloop_5 - _data_q_p_1[q];
+                   const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
+                   const real_t tmp_qloop_15 = tmp_qloop_2*4.0;
+                   const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15;
+                   const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
+                   const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
+                   const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18;
+                   const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10;
+                   const real_t tmp_qloop_21 = tmp_qloop_18*tmp_qloop_20;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*_data_q_p_0[q];
+                   const real_t tmp_qloop_23 = tmp_qloop_11*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = tmp_qloop_17*_data_q_p_1[q];
+                   const real_t tmp_qloop_25 = tmp_qloop_11*tmp_qloop_24;
+                   const real_t tmp_qloop_26 = tmp_qloop_20*tmp_qloop_22;
+                   const real_t tmp_qloop_27 = tmp_qloop_20*tmp_qloop_24;
+                   const real_t tmp_qloop_28 = tmp_qloop_0*tmp_qloop_17*tmp_qloop_4;
+                   const real_t q_tmp_0_0 = tmp_qloop_19*tmp_qloop_6;
+                   const real_t q_tmp_0_1 = tmp_qloop_12*tmp_qloop_19;
+                   const real_t q_tmp_0_2 = tmp_qloop_13*tmp_qloop_19;
+                   const real_t q_tmp_0_3 = tmp_qloop_1*tmp_qloop_19;
+                   const real_t q_tmp_0_4 = tmp_qloop_14*tmp_qloop_19;
+                   const real_t q_tmp_0_5 = tmp_qloop_16*tmp_qloop_19;
+                   const real_t q_tmp_0_6 = tmp_qloop_21*tmp_qloop_6;
+                   const real_t q_tmp_0_7 = tmp_qloop_12*tmp_qloop_21;
+                   const real_t q_tmp_0_8 = tmp_qloop_13*tmp_qloop_21;
+                   const real_t q_tmp_0_9 = tmp_qloop_1*tmp_qloop_21;
+                   const real_t q_tmp_0_10 = tmp_qloop_14*tmp_qloop_21;
+                   const real_t q_tmp_0_11 = tmp_qloop_16*tmp_qloop_21;
+                   const real_t q_tmp_1_0 = tmp_qloop_23*tmp_qloop_6;
+                   const real_t q_tmp_1_1 = tmp_qloop_12*tmp_qloop_23;
+                   const real_t q_tmp_1_2 = tmp_qloop_13*tmp_qloop_23;
+                   const real_t q_tmp_1_3 = tmp_qloop_15*tmp_qloop_25;
+                   const real_t q_tmp_1_4 = tmp_qloop_14*tmp_qloop_23;
+                   const real_t q_tmp_1_5 = tmp_qloop_16*tmp_qloop_23;
+                   const real_t q_tmp_1_6 = tmp_qloop_26*tmp_qloop_6;
+                   const real_t q_tmp_1_7 = tmp_qloop_12*tmp_qloop_26;
+                   const real_t q_tmp_1_8 = tmp_qloop_13*tmp_qloop_26;
+                   const real_t q_tmp_1_9 = tmp_qloop_15*tmp_qloop_27;
+                   const real_t q_tmp_1_10 = tmp_qloop_14*tmp_qloop_26;
+                   const real_t q_tmp_1_11 = tmp_qloop_16*tmp_qloop_26;
+                   const real_t q_tmp_2_0 = tmp_qloop_25*tmp_qloop_6;
+                   const real_t q_tmp_2_1 = tmp_qloop_12*tmp_qloop_25;
+                   const real_t q_tmp_2_2 = tmp_qloop_13*tmp_qloop_25;
+                   const real_t q_tmp_2_3 = tmp_qloop_11*tmp_qloop_28;
+                   const real_t q_tmp_2_4 = tmp_qloop_14*tmp_qloop_25;
+                   const real_t q_tmp_2_5 = tmp_qloop_16*tmp_qloop_25;
+                   const real_t q_tmp_2_6 = tmp_qloop_27*tmp_qloop_6;
+                   const real_t q_tmp_2_7 = tmp_qloop_12*tmp_qloop_27;
+                   const real_t q_tmp_2_8 = tmp_qloop_13*tmp_qloop_27;
+                   const real_t q_tmp_2_9 = tmp_qloop_20*tmp_qloop_28;
+                   const real_t q_tmp_2_10 = tmp_qloop_14*tmp_qloop_27;
+                   const real_t q_tmp_2_11 = tmp_qloop_16*tmp_qloop_27;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
new file mode 100644
index 00000000..d4b9475d
--- /dev/null
+++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
@@ -0,0 +1,6458 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG Operator Generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_edge_2, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t * RESTRICT  _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
+   
+       const real_t _data_q_p_0 [] = {0.25, 0.16666666666666666, 0.16666666666666666, 0.5, 0.16666666666666666};
+   
+       const real_t _data_q_p_1 [] = {0.25, 0.16666666666666666, 0.5, 0.16666666666666666, 0.16666666666666666};
+   
+       const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
+   
+       const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
+       const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
+       const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
+       const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
+       const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
+       const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
+       const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
+       const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
+       const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
+       const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
+       const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
+       const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
+       const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP;
+       const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP);
+       const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
+       const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
+       const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
+       const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP);
+       {
+          /* CellType.WHITE_UP */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7);
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6);
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8));
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8);
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1);
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13);
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33);
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP)));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35);
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP)));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9);
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39);
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9);
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40);
+                   const __m256d q_tmp_1_4 = tmp_qloop_43;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46);
+                   const __m256d q_tmp_1_14 = tmp_qloop_48;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50);
+                   const __m256d q_tmp_1_24 = tmp_qloop_52;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42);
+                   const __m256d q_tmp_2_5 = tmp_qloop_43;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47);
+                   const __m256d q_tmp_2_15 = tmp_qloop_48;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51);
+                   const __m256d q_tmp_2_25 = tmp_qloop_52;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8);
+                   const __m256d q_tmp_3_6 = tmp_qloop_43;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8);
+                   const __m256d q_tmp_3_16 = tmp_qloop_48;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8);
+                   const __m256d q_tmp_3_26 = tmp_qloop_52;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_1 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q];
+                   const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q];
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6;
+                   const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6;
+                   const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0);
+                   const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16;
+                   const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17;
+                   const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0;
+                   const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8;
+                   const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20;
+                   const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20;
+                   const real_t tmp_qloop_23 = jac_affine_inv_0_0_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_0_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_0_WHITE_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q];
+                   const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q];
+                   const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q];
+                   const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7;
+                   const real_t tmp_qloop_28 = tmp_qloop_2*4.0;
+                   const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
+                   const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
+                   const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35;
+                   const real_t tmp_qloop_37 = jac_affine_inv_0_2_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_2_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_2_WHITE_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q];
+                   const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9;
+                   const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39;
+                   const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9;
+                   const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44;
+                   const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39;
+                   const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41;
+                   const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9;
+                   const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44;
+                   const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32;
+                   const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8;
+                   const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54;
+                   const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4;
+                   const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58;
+                   const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34;
+                   const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34;
+                   const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34;
+                   const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34;
+                   const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7;
+                   const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34;
+                   const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9;
+                   const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34;
+                   const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34;
+                   const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34;
+                   const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36;
+                   const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36;
+                   const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7;
+                   const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36;
+                   const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9;
+                   const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36;
+                   const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36;
+                   const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38;
+                   const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38;
+                   const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38;
+                   const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38;
+                   const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7;
+                   const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38;
+                   const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9;
+                   const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38;
+                   const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38;
+                   const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38;
+                   const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40;
+                   const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40;
+                   const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40;
+                   const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40;
+                   const real_t q_tmp_1_4 = tmp_qloop_43;
+                   const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42;
+                   const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45;
+                   const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40;
+                   const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40;
+                   const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46;
+                   const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46;
+                   const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46;
+                   const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46;
+                   const real_t q_tmp_1_14 = tmp_qloop_48;
+                   const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47;
+                   const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49;
+                   const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46;
+                   const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46;
+                   const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46;
+                   const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50;
+                   const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50;
+                   const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50;
+                   const real_t q_tmp_1_24 = tmp_qloop_52;
+                   const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51;
+                   const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53;
+                   const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50;
+                   const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50;
+                   const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50;
+                   const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45;
+                   const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45;
+                   const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45;
+                   const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45;
+                   const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42;
+                   const real_t q_tmp_2_5 = tmp_qloop_43;
+                   const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55;
+                   const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45;
+                   const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45;
+                   const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45;
+                   const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49;
+                   const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49;
+                   const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49;
+                   const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49;
+                   const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47;
+                   const real_t q_tmp_2_15 = tmp_qloop_48;
+                   const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56;
+                   const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49;
+                   const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49;
+                   const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49;
+                   const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53;
+                   const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53;
+                   const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53;
+                   const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53;
+                   const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51;
+                   const real_t q_tmp_2_25 = tmp_qloop_52;
+                   const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56;
+                   const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53;
+                   const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53;
+                   const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53;
+                   const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42;
+                   const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42;
+                   const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42;
+                   const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42;
+                   const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6;
+                   const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8;
+                   const real_t q_tmp_3_6 = tmp_qloop_43;
+                   const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42;
+                   const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42;
+                   const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42;
+                   const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47;
+                   const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47;
+                   const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47;
+                   const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47;
+                   const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6;
+                   const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8;
+                   const real_t q_tmp_3_16 = tmp_qloop_48;
+                   const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47;
+                   const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47;
+                   const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47;
+                   const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51;
+                   const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51;
+                   const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51;
+                   const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51;
+                   const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60;
+                   const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8;
+                   const real_t q_tmp_3_26 = tmp_qloop_52;
+                   const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51;
+                   const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51;
+                   const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
+       const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
+       const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
+       const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
+       const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
+       const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
+       const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
+       const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
+       const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
+       const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN;
+       const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN);
+       const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
+       const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
+       const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
+       const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN);
+       {
+          /* CellType.WHITE_DOWN */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7);
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6);
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8));
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8);
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1);
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13);
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33);
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN)));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35);
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN)));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9);
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39);
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9);
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40);
+                   const __m256d q_tmp_1_4 = tmp_qloop_43;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46);
+                   const __m256d q_tmp_1_14 = tmp_qloop_48;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50);
+                   const __m256d q_tmp_1_24 = tmp_qloop_52;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42);
+                   const __m256d q_tmp_2_5 = tmp_qloop_43;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47);
+                   const __m256d q_tmp_2_15 = tmp_qloop_48;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51);
+                   const __m256d q_tmp_2_25 = tmp_qloop_52;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8);
+                   const __m256d q_tmp_3_6 = tmp_qloop_43;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8);
+                   const __m256d q_tmp_3_16 = tmp_qloop_48;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8);
+                   const __m256d q_tmp_3_26 = tmp_qloop_52;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_1 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q];
+                   const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q];
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6;
+                   const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6;
+                   const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0);
+                   const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16;
+                   const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17;
+                   const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0;
+                   const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8;
+                   const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20;
+                   const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20;
+                   const real_t tmp_qloop_23 = jac_affine_inv_0_0_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_WHITE_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q];
+                   const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q];
+                   const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q];
+                   const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7;
+                   const real_t tmp_qloop_28 = tmp_qloop_2*4.0;
+                   const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
+                   const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
+                   const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35;
+                   const real_t tmp_qloop_37 = jac_affine_inv_0_2_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_WHITE_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q];
+                   const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9;
+                   const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39;
+                   const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9;
+                   const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44;
+                   const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39;
+                   const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41;
+                   const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9;
+                   const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44;
+                   const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32;
+                   const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8;
+                   const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54;
+                   const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4;
+                   const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58;
+                   const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34;
+                   const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34;
+                   const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34;
+                   const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34;
+                   const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7;
+                   const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34;
+                   const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9;
+                   const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34;
+                   const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34;
+                   const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34;
+                   const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36;
+                   const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36;
+                   const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7;
+                   const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36;
+                   const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9;
+                   const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36;
+                   const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36;
+                   const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38;
+                   const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38;
+                   const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38;
+                   const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38;
+                   const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7;
+                   const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38;
+                   const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9;
+                   const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38;
+                   const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38;
+                   const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38;
+                   const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40;
+                   const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40;
+                   const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40;
+                   const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40;
+                   const real_t q_tmp_1_4 = tmp_qloop_43;
+                   const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42;
+                   const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45;
+                   const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40;
+                   const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40;
+                   const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46;
+                   const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46;
+                   const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46;
+                   const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46;
+                   const real_t q_tmp_1_14 = tmp_qloop_48;
+                   const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47;
+                   const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49;
+                   const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46;
+                   const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46;
+                   const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46;
+                   const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50;
+                   const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50;
+                   const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50;
+                   const real_t q_tmp_1_24 = tmp_qloop_52;
+                   const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51;
+                   const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53;
+                   const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50;
+                   const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50;
+                   const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50;
+                   const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45;
+                   const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45;
+                   const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45;
+                   const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45;
+                   const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42;
+                   const real_t q_tmp_2_5 = tmp_qloop_43;
+                   const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55;
+                   const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45;
+                   const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45;
+                   const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45;
+                   const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49;
+                   const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49;
+                   const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49;
+                   const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49;
+                   const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47;
+                   const real_t q_tmp_2_15 = tmp_qloop_48;
+                   const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56;
+                   const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49;
+                   const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49;
+                   const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49;
+                   const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53;
+                   const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53;
+                   const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53;
+                   const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53;
+                   const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51;
+                   const real_t q_tmp_2_25 = tmp_qloop_52;
+                   const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56;
+                   const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53;
+                   const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53;
+                   const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53;
+                   const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42;
+                   const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42;
+                   const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42;
+                   const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42;
+                   const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6;
+                   const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8;
+                   const real_t q_tmp_3_6 = tmp_qloop_43;
+                   const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42;
+                   const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42;
+                   const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42;
+                   const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47;
+                   const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47;
+                   const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47;
+                   const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47;
+                   const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6;
+                   const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8;
+                   const real_t q_tmp_3_16 = tmp_qloop_48;
+                   const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47;
+                   const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47;
+                   const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47;
+                   const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51;
+                   const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51;
+                   const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51;
+                   const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51;
+                   const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60;
+                   const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8;
+                   const real_t q_tmp_3_26 = tmp_qloop_52;
+                   const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51;
+                   const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51;
+                   const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
+       const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
+       const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
+       const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
+       const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
+       const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
+       const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
+       const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
+       const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
+       const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
+       const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
+       const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
+       const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
+       const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP;
+       const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP);
+       const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
+       const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
+       const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
+       const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP);
+       {
+          /* CellType.BLUE_UP */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7);
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6);
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8));
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8);
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1);
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13);
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33);
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP)));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35);
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP)));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9);
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39);
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9);
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40);
+                   const __m256d q_tmp_1_4 = tmp_qloop_43;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46);
+                   const __m256d q_tmp_1_14 = tmp_qloop_48;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50);
+                   const __m256d q_tmp_1_24 = tmp_qloop_52;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42);
+                   const __m256d q_tmp_2_5 = tmp_qloop_43;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47);
+                   const __m256d q_tmp_2_15 = tmp_qloop_48;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51);
+                   const __m256d q_tmp_2_25 = tmp_qloop_52;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8);
+                   const __m256d q_tmp_3_6 = tmp_qloop_43;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8);
+                   const __m256d q_tmp_3_16 = tmp_qloop_48;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8);
+                   const __m256d q_tmp_3_26 = tmp_qloop_52;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_1 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q];
+                   const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q];
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6;
+                   const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6;
+                   const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0);
+                   const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16;
+                   const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17;
+                   const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0;
+                   const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8;
+                   const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20;
+                   const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20;
+                   const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_0_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_0_BLUE_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q];
+                   const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q];
+                   const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q];
+                   const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7;
+                   const real_t tmp_qloop_28 = tmp_qloop_2*4.0;
+                   const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
+                   const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
+                   const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35;
+                   const real_t tmp_qloop_37 = jac_affine_inv_0_2_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_2_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_2_BLUE_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q];
+                   const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9;
+                   const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39;
+                   const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9;
+                   const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44;
+                   const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39;
+                   const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41;
+                   const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9;
+                   const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44;
+                   const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32;
+                   const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8;
+                   const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54;
+                   const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4;
+                   const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58;
+                   const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34;
+                   const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34;
+                   const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34;
+                   const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34;
+                   const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7;
+                   const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34;
+                   const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9;
+                   const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34;
+                   const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34;
+                   const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34;
+                   const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36;
+                   const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36;
+                   const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7;
+                   const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36;
+                   const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9;
+                   const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36;
+                   const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36;
+                   const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38;
+                   const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38;
+                   const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38;
+                   const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38;
+                   const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7;
+                   const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38;
+                   const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9;
+                   const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38;
+                   const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38;
+                   const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38;
+                   const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40;
+                   const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40;
+                   const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40;
+                   const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40;
+                   const real_t q_tmp_1_4 = tmp_qloop_43;
+                   const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42;
+                   const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45;
+                   const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40;
+                   const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40;
+                   const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46;
+                   const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46;
+                   const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46;
+                   const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46;
+                   const real_t q_tmp_1_14 = tmp_qloop_48;
+                   const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47;
+                   const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49;
+                   const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46;
+                   const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46;
+                   const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46;
+                   const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50;
+                   const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50;
+                   const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50;
+                   const real_t q_tmp_1_24 = tmp_qloop_52;
+                   const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51;
+                   const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53;
+                   const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50;
+                   const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50;
+                   const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50;
+                   const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45;
+                   const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45;
+                   const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45;
+                   const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45;
+                   const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42;
+                   const real_t q_tmp_2_5 = tmp_qloop_43;
+                   const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55;
+                   const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45;
+                   const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45;
+                   const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45;
+                   const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49;
+                   const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49;
+                   const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49;
+                   const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49;
+                   const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47;
+                   const real_t q_tmp_2_15 = tmp_qloop_48;
+                   const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56;
+                   const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49;
+                   const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49;
+                   const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49;
+                   const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53;
+                   const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53;
+                   const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53;
+                   const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53;
+                   const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51;
+                   const real_t q_tmp_2_25 = tmp_qloop_52;
+                   const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56;
+                   const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53;
+                   const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53;
+                   const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53;
+                   const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42;
+                   const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42;
+                   const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42;
+                   const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42;
+                   const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6;
+                   const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8;
+                   const real_t q_tmp_3_6 = tmp_qloop_43;
+                   const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42;
+                   const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42;
+                   const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42;
+                   const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47;
+                   const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47;
+                   const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47;
+                   const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47;
+                   const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6;
+                   const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8;
+                   const real_t q_tmp_3_16 = tmp_qloop_48;
+                   const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47;
+                   const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47;
+                   const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47;
+                   const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51;
+                   const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51;
+                   const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51;
+                   const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51;
+                   const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60;
+                   const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8;
+                   const real_t q_tmp_3_26 = tmp_qloop_52;
+                   const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51;
+                   const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51;
+                   const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
+       const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
+       const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
+       const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
+       const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
+       const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
+       const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
+       const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN;
+       const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN);
+       const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
+       const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
+       const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
+       const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN);
+       {
+          /* CellType.BLUE_DOWN */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7);
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6);
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8));
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8);
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1);
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13);
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33);
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN)));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35);
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN)));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9);
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39);
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9);
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40);
+                   const __m256d q_tmp_1_4 = tmp_qloop_43;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46);
+                   const __m256d q_tmp_1_14 = tmp_qloop_48;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50);
+                   const __m256d q_tmp_1_24 = tmp_qloop_52;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42);
+                   const __m256d q_tmp_2_5 = tmp_qloop_43;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47);
+                   const __m256d q_tmp_2_15 = tmp_qloop_48;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51);
+                   const __m256d q_tmp_2_25 = tmp_qloop_52;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8);
+                   const __m256d q_tmp_3_6 = tmp_qloop_43;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8);
+                   const __m256d q_tmp_3_16 = tmp_qloop_48;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8);
+                   const __m256d q_tmp_3_26 = tmp_qloop_52;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_1 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q];
+                   const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q];
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6;
+                   const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6;
+                   const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0);
+                   const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16;
+                   const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17;
+                   const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0;
+                   const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8;
+                   const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20;
+                   const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20;
+                   const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_BLUE_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q];
+                   const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q];
+                   const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q];
+                   const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7;
+                   const real_t tmp_qloop_28 = tmp_qloop_2*4.0;
+                   const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
+                   const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
+                   const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35;
+                   const real_t tmp_qloop_37 = jac_affine_inv_0_2_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_BLUE_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q];
+                   const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9;
+                   const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39;
+                   const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9;
+                   const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44;
+                   const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39;
+                   const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41;
+                   const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9;
+                   const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44;
+                   const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32;
+                   const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8;
+                   const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54;
+                   const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4;
+                   const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58;
+                   const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34;
+                   const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34;
+                   const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34;
+                   const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34;
+                   const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7;
+                   const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34;
+                   const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9;
+                   const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34;
+                   const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34;
+                   const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34;
+                   const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36;
+                   const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36;
+                   const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7;
+                   const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36;
+                   const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9;
+                   const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36;
+                   const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36;
+                   const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38;
+                   const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38;
+                   const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38;
+                   const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38;
+                   const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7;
+                   const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38;
+                   const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9;
+                   const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38;
+                   const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38;
+                   const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38;
+                   const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40;
+                   const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40;
+                   const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40;
+                   const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40;
+                   const real_t q_tmp_1_4 = tmp_qloop_43;
+                   const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42;
+                   const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45;
+                   const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40;
+                   const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40;
+                   const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46;
+                   const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46;
+                   const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46;
+                   const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46;
+                   const real_t q_tmp_1_14 = tmp_qloop_48;
+                   const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47;
+                   const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49;
+                   const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46;
+                   const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46;
+                   const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46;
+                   const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50;
+                   const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50;
+                   const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50;
+                   const real_t q_tmp_1_24 = tmp_qloop_52;
+                   const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51;
+                   const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53;
+                   const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50;
+                   const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50;
+                   const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50;
+                   const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45;
+                   const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45;
+                   const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45;
+                   const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45;
+                   const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42;
+                   const real_t q_tmp_2_5 = tmp_qloop_43;
+                   const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55;
+                   const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45;
+                   const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45;
+                   const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45;
+                   const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49;
+                   const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49;
+                   const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49;
+                   const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49;
+                   const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47;
+                   const real_t q_tmp_2_15 = tmp_qloop_48;
+                   const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56;
+                   const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49;
+                   const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49;
+                   const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49;
+                   const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53;
+                   const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53;
+                   const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53;
+                   const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53;
+                   const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51;
+                   const real_t q_tmp_2_25 = tmp_qloop_52;
+                   const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56;
+                   const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53;
+                   const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53;
+                   const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53;
+                   const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42;
+                   const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42;
+                   const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42;
+                   const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42;
+                   const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6;
+                   const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8;
+                   const real_t q_tmp_3_6 = tmp_qloop_43;
+                   const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42;
+                   const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42;
+                   const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42;
+                   const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47;
+                   const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47;
+                   const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47;
+                   const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47;
+                   const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6;
+                   const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8;
+                   const real_t q_tmp_3_16 = tmp_qloop_48;
+                   const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47;
+                   const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47;
+                   const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47;
+                   const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51;
+                   const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51;
+                   const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51;
+                   const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51;
+                   const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60;
+                   const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8;
+                   const real_t q_tmp_3_26 = tmp_qloop_52;
+                   const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51;
+                   const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51;
+                   const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
+       const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
+       const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
+       const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
+       const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
+       const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
+       const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
+       const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
+       const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
+       const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
+       const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
+       const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
+       const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
+       const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
+       const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
+       const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
+       const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP;
+       const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP);
+       const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
+       const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
+       const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
+       const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP);
+       {
+          /* CellType.GREEN_UP */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7);
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6);
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8));
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8);
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1);
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13);
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33);
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP)));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35);
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP)));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9);
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39);
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9);
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40);
+                   const __m256d q_tmp_1_4 = tmp_qloop_43;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46);
+                   const __m256d q_tmp_1_14 = tmp_qloop_48;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50);
+                   const __m256d q_tmp_1_24 = tmp_qloop_52;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42);
+                   const __m256d q_tmp_2_5 = tmp_qloop_43;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47);
+                   const __m256d q_tmp_2_15 = tmp_qloop_48;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51);
+                   const __m256d q_tmp_2_25 = tmp_qloop_52;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8);
+                   const __m256d q_tmp_3_6 = tmp_qloop_43;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8);
+                   const __m256d q_tmp_3_16 = tmp_qloop_48;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8);
+                   const __m256d q_tmp_3_26 = tmp_qloop_52;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_1 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q];
+                   const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q];
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6;
+                   const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6;
+                   const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0);
+                   const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16;
+                   const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17;
+                   const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0;
+                   const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8;
+                   const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20;
+                   const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20;
+                   const real_t tmp_qloop_23 = jac_affine_inv_0_0_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_0_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_0_GREEN_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q];
+                   const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q];
+                   const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q];
+                   const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7;
+                   const real_t tmp_qloop_28 = tmp_qloop_2*4.0;
+                   const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
+                   const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
+                   const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35;
+                   const real_t tmp_qloop_37 = jac_affine_inv_0_2_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_2_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_2_GREEN_UP*tmp_qloop_22;
+                   const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q];
+                   const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9;
+                   const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39;
+                   const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9;
+                   const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44;
+                   const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39;
+                   const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41;
+                   const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9;
+                   const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44;
+                   const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32;
+                   const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8;
+                   const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54;
+                   const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4;
+                   const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58;
+                   const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34;
+                   const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34;
+                   const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34;
+                   const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34;
+                   const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7;
+                   const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34;
+                   const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9;
+                   const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34;
+                   const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34;
+                   const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34;
+                   const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36;
+                   const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36;
+                   const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7;
+                   const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36;
+                   const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9;
+                   const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36;
+                   const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36;
+                   const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38;
+                   const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38;
+                   const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38;
+                   const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38;
+                   const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7;
+                   const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38;
+                   const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9;
+                   const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38;
+                   const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38;
+                   const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38;
+                   const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40;
+                   const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40;
+                   const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40;
+                   const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40;
+                   const real_t q_tmp_1_4 = tmp_qloop_43;
+                   const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42;
+                   const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45;
+                   const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40;
+                   const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40;
+                   const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46;
+                   const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46;
+                   const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46;
+                   const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46;
+                   const real_t q_tmp_1_14 = tmp_qloop_48;
+                   const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47;
+                   const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49;
+                   const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46;
+                   const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46;
+                   const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46;
+                   const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50;
+                   const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50;
+                   const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50;
+                   const real_t q_tmp_1_24 = tmp_qloop_52;
+                   const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51;
+                   const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53;
+                   const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50;
+                   const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50;
+                   const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50;
+                   const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45;
+                   const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45;
+                   const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45;
+                   const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45;
+                   const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42;
+                   const real_t q_tmp_2_5 = tmp_qloop_43;
+                   const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55;
+                   const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45;
+                   const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45;
+                   const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45;
+                   const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49;
+                   const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49;
+                   const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49;
+                   const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49;
+                   const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47;
+                   const real_t q_tmp_2_15 = tmp_qloop_48;
+                   const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56;
+                   const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49;
+                   const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49;
+                   const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49;
+                   const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53;
+                   const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53;
+                   const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53;
+                   const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53;
+                   const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51;
+                   const real_t q_tmp_2_25 = tmp_qloop_52;
+                   const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56;
+                   const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53;
+                   const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53;
+                   const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53;
+                   const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42;
+                   const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42;
+                   const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42;
+                   const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42;
+                   const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6;
+                   const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8;
+                   const real_t q_tmp_3_6 = tmp_qloop_43;
+                   const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42;
+                   const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42;
+                   const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42;
+                   const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47;
+                   const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47;
+                   const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47;
+                   const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47;
+                   const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6;
+                   const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8;
+                   const real_t q_tmp_3_16 = tmp_qloop_48;
+                   const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47;
+                   const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47;
+                   const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47;
+                   const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51;
+                   const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51;
+                   const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51;
+                   const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51;
+                   const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60;
+                   const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8;
+                   const real_t q_tmp_3_26 = tmp_qloop_52;
+                   const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51;
+                   const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51;
+                   const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
+       const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
+       const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
+       const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
+       const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
+       const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
+       const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
+       const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
+       const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
+       const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
+       const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN;
+       const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN);
+       const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
+       const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
+       const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
+       const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN);
+       {
+          /* CellType.GREEN_DOWN */
+          for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
+          for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]);
+                const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]);
+                const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]);
+                const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]);
+                const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7);
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6);
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8));
+                   const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16);
+                   const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17);
+                   const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8);
+                   const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20);
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20);
+                   const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1);
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3);
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5);
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13);
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0));
+                   const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33);
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN)));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35);
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN)));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39);
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9);
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39);
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9);
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58);
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34);
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34);
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34);
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34);
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7);
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34);
+                   const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9);
+                   const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34);
+                   const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34);
+                   const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34);
+                   const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36);
+                   const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36);
+                   const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7);
+                   const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36);
+                   const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9);
+                   const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36);
+                   const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36);
+                   const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38);
+                   const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38);
+                   const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38);
+                   const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38);
+                   const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7);
+                   const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38);
+                   const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9);
+                   const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38);
+                   const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38);
+                   const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38);
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40);
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40);
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40);
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40);
+                   const __m256d q_tmp_1_4 = tmp_qloop_43;
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42);
+                   const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45);
+                   const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40);
+                   const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40);
+                   const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40);
+                   const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46);
+                   const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46);
+                   const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46);
+                   const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46);
+                   const __m256d q_tmp_1_14 = tmp_qloop_48;
+                   const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47);
+                   const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49);
+                   const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46);
+                   const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46);
+                   const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46);
+                   const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50);
+                   const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50);
+                   const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50);
+                   const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50);
+                   const __m256d q_tmp_1_24 = tmp_qloop_52;
+                   const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51);
+                   const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53);
+                   const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50);
+                   const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50);
+                   const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50);
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45);
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45);
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45);
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45);
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42);
+                   const __m256d q_tmp_2_5 = tmp_qloop_43;
+                   const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55);
+                   const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45);
+                   const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45);
+                   const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45);
+                   const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49);
+                   const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49);
+                   const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49);
+                   const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49);
+                   const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47);
+                   const __m256d q_tmp_2_15 = tmp_qloop_48;
+                   const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56);
+                   const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49);
+                   const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49);
+                   const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49);
+                   const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53);
+                   const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53);
+                   const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53);
+                   const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53);
+                   const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51);
+                   const __m256d q_tmp_2_25 = tmp_qloop_52;
+                   const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56);
+                   const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53);
+                   const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53);
+                   const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53);
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42);
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42);
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42);
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42);
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6);
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8);
+                   const __m256d q_tmp_3_6 = tmp_qloop_43;
+                   const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42);
+                   const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42);
+                   const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42);
+                   const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47);
+                   const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47);
+                   const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47);
+                   const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47);
+                   const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6);
+                   const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8);
+                   const __m256d q_tmp_3_16 = tmp_qloop_48;
+                   const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47);
+                   const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47);
+                   const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47);
+                   const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51);
+                   const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51);
+                   const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51);
+                   const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51);
+                   const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60);
+                   const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8);
+                   const __m256d q_tmp_3_26 = tmp_qloop_52;
+                   const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51);
+                   const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51);
+                   const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51);
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6);
+                   q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7);
+                   q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8);
+                   q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9);
+                   q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10);
+                   q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11);
+                   q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12);
+                   q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13);
+                   q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14);
+                   q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15);
+                   q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16);
+                   q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17);
+                   q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18);
+                   q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19);
+                   q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20);
+                   q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21);
+                   q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22);
+                   q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23);
+                   q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24);
+                   q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25);
+                   q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26);
+                   q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27);
+                   q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28);
+                   q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6);
+                   q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7);
+                   q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8);
+                   q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9);
+                   q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10);
+                   q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11);
+                   q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12);
+                   q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13);
+                   q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14);
+                   q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15);
+                   q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16);
+                   q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17);
+                   q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18);
+                   q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19);
+                   q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20);
+                   q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21);
+                   q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22);
+                   q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23);
+                   q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24);
+                   q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25);
+                   q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26);
+                   q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27);
+                   q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28);
+                   q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6);
+                   q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7);
+                   q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8);
+                   q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9);
+                   q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10);
+                   q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11);
+                   q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12);
+                   q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13);
+                   q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14);
+                   q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15);
+                   q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16);
+                   q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17);
+                   q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18);
+                   q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19);
+                   q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20);
+                   q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21);
+                   q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22);
+                   q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23);
+                   q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24);
+                   q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25);
+                   q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26);
+                   q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27);
+                   q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28);
+                   q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6);
+                   q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7);
+                   q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8);
+                   q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9);
+                   q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10);
+                   q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11);
+                   q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12);
+                   q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13);
+                   q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14);
+                   q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15);
+                   q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16);
+                   q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17);
+                   q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18);
+                   q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19);
+                   q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20);
+                   q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21);
+                   q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22);
+                   q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23);
+                   q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24);
+                   q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25);
+                   q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26);
+                   q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27);
+                   q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28);
+                   q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+                real_t _data_float_loop_ctr_array_dim_2[4];
+                _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+                _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_5 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_9 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_15 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_19 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_25 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t src_dof_29 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+                const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1];
+                const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))];
+                const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_0_6 = 0.0;
+                real_t q_acc_0_7 = 0.0;
+                real_t q_acc_0_8 = 0.0;
+                real_t q_acc_0_9 = 0.0;
+                real_t q_acc_0_10 = 0.0;
+                real_t q_acc_0_11 = 0.0;
+                real_t q_acc_0_12 = 0.0;
+                real_t q_acc_0_13 = 0.0;
+                real_t q_acc_0_14 = 0.0;
+                real_t q_acc_0_15 = 0.0;
+                real_t q_acc_0_16 = 0.0;
+                real_t q_acc_0_17 = 0.0;
+                real_t q_acc_0_18 = 0.0;
+                real_t q_acc_0_19 = 0.0;
+                real_t q_acc_0_20 = 0.0;
+                real_t q_acc_0_21 = 0.0;
+                real_t q_acc_0_22 = 0.0;
+                real_t q_acc_0_23 = 0.0;
+                real_t q_acc_0_24 = 0.0;
+                real_t q_acc_0_25 = 0.0;
+                real_t q_acc_0_26 = 0.0;
+                real_t q_acc_0_27 = 0.0;
+                real_t q_acc_0_28 = 0.0;
+                real_t q_acc_0_29 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_1_6 = 0.0;
+                real_t q_acc_1_7 = 0.0;
+                real_t q_acc_1_8 = 0.0;
+                real_t q_acc_1_9 = 0.0;
+                real_t q_acc_1_10 = 0.0;
+                real_t q_acc_1_11 = 0.0;
+                real_t q_acc_1_12 = 0.0;
+                real_t q_acc_1_13 = 0.0;
+                real_t q_acc_1_14 = 0.0;
+                real_t q_acc_1_15 = 0.0;
+                real_t q_acc_1_16 = 0.0;
+                real_t q_acc_1_17 = 0.0;
+                real_t q_acc_1_18 = 0.0;
+                real_t q_acc_1_19 = 0.0;
+                real_t q_acc_1_20 = 0.0;
+                real_t q_acc_1_21 = 0.0;
+                real_t q_acc_1_22 = 0.0;
+                real_t q_acc_1_23 = 0.0;
+                real_t q_acc_1_24 = 0.0;
+                real_t q_acc_1_25 = 0.0;
+                real_t q_acc_1_26 = 0.0;
+                real_t q_acc_1_27 = 0.0;
+                real_t q_acc_1_28 = 0.0;
+                real_t q_acc_1_29 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_2_6 = 0.0;
+                real_t q_acc_2_7 = 0.0;
+                real_t q_acc_2_8 = 0.0;
+                real_t q_acc_2_9 = 0.0;
+                real_t q_acc_2_10 = 0.0;
+                real_t q_acc_2_11 = 0.0;
+                real_t q_acc_2_12 = 0.0;
+                real_t q_acc_2_13 = 0.0;
+                real_t q_acc_2_14 = 0.0;
+                real_t q_acc_2_15 = 0.0;
+                real_t q_acc_2_16 = 0.0;
+                real_t q_acc_2_17 = 0.0;
+                real_t q_acc_2_18 = 0.0;
+                real_t q_acc_2_19 = 0.0;
+                real_t q_acc_2_20 = 0.0;
+                real_t q_acc_2_21 = 0.0;
+                real_t q_acc_2_22 = 0.0;
+                real_t q_acc_2_23 = 0.0;
+                real_t q_acc_2_24 = 0.0;
+                real_t q_acc_2_25 = 0.0;
+                real_t q_acc_2_26 = 0.0;
+                real_t q_acc_2_27 = 0.0;
+                real_t q_acc_2_28 = 0.0;
+                real_t q_acc_2_29 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_3_6 = 0.0;
+                real_t q_acc_3_7 = 0.0;
+                real_t q_acc_3_8 = 0.0;
+                real_t q_acc_3_9 = 0.0;
+                real_t q_acc_3_10 = 0.0;
+                real_t q_acc_3_11 = 0.0;
+                real_t q_acc_3_12 = 0.0;
+                real_t q_acc_3_13 = 0.0;
+                real_t q_acc_3_14 = 0.0;
+                real_t q_acc_3_15 = 0.0;
+                real_t q_acc_3_16 = 0.0;
+                real_t q_acc_3_17 = 0.0;
+                real_t q_acc_3_18 = 0.0;
+                real_t q_acc_3_19 = 0.0;
+                real_t q_acc_3_20 = 0.0;
+                real_t q_acc_3_21 = 0.0;
+                real_t q_acc_3_22 = 0.0;
+                real_t q_acc_3_23 = 0.0;
+                real_t q_acc_3_24 = 0.0;
+                real_t q_acc_3_25 = 0.0;
+                real_t q_acc_3_26 = 0.0;
+                real_t q_acc_3_27 = 0.0;
+                real_t q_acc_3_28 = 0.0;
+                real_t q_acc_3_29 = 0.0;
+                for (int64_t q = 0; q < 5; q += 1)
+                {
+                   const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]);
+                   const real_t tmp_qloop_1 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]);
+                   const real_t tmp_qloop_3 = tmp_qloop_2*2.0;
+                   const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]);
+                   const real_t tmp_qloop_5 = tmp_qloop_4*2.0;
+                   const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q];
+                   const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q];
+                   const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q];
+                   const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q];
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0;
+                   const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q];
+                   const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6;
+                   const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6;
+                   const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0);
+                   const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16;
+                   const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17;
+                   const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0;
+                   const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8;
+                   const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20;
+                   const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20;
+                   const real_t tmp_qloop_23 = jac_affine_inv_0_0_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_GREEN_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q];
+                   const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q];
+                   const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q];
+                   const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7;
+                   const real_t tmp_qloop_28 = tmp_qloop_2*4.0;
+                   const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
+                   const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
+                   const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
+                   const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
+                   const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35;
+                   const real_t tmp_qloop_37 = jac_affine_inv_0_2_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_GREEN_DOWN*tmp_qloop_22;
+                   const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q];
+                   const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41;
+                   const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9;
+                   const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39;
+                   const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9;
+                   const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44;
+                   const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39;
+                   const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41;
+                   const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9;
+                   const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44;
+                   const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32;
+                   const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8;
+                   const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54;
+                   const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4;
+                   const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58;
+                   const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58;
+                   const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34;
+                   const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34;
+                   const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34;
+                   const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34;
+                   const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7;
+                   const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34;
+                   const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9;
+                   const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34;
+                   const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34;
+                   const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34;
+                   const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36;
+                   const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36;
+                   const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7;
+                   const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36;
+                   const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9;
+                   const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36;
+                   const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36;
+                   const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38;
+                   const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38;
+                   const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38;
+                   const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38;
+                   const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7;
+                   const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38;
+                   const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9;
+                   const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38;
+                   const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38;
+                   const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38;
+                   const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40;
+                   const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40;
+                   const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40;
+                   const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40;
+                   const real_t q_tmp_1_4 = tmp_qloop_43;
+                   const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42;
+                   const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45;
+                   const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40;
+                   const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40;
+                   const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40;
+                   const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46;
+                   const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46;
+                   const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46;
+                   const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46;
+                   const real_t q_tmp_1_14 = tmp_qloop_48;
+                   const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47;
+                   const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49;
+                   const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46;
+                   const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46;
+                   const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46;
+                   const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50;
+                   const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50;
+                   const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50;
+                   const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50;
+                   const real_t q_tmp_1_24 = tmp_qloop_52;
+                   const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51;
+                   const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53;
+                   const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50;
+                   const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50;
+                   const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50;
+                   const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45;
+                   const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45;
+                   const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45;
+                   const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45;
+                   const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42;
+                   const real_t q_tmp_2_5 = tmp_qloop_43;
+                   const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55;
+                   const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45;
+                   const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45;
+                   const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45;
+                   const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49;
+                   const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49;
+                   const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49;
+                   const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49;
+                   const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47;
+                   const real_t q_tmp_2_15 = tmp_qloop_48;
+                   const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56;
+                   const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49;
+                   const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49;
+                   const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49;
+                   const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53;
+                   const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53;
+                   const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53;
+                   const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53;
+                   const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51;
+                   const real_t q_tmp_2_25 = tmp_qloop_52;
+                   const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56;
+                   const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53;
+                   const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53;
+                   const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53;
+                   const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42;
+                   const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42;
+                   const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42;
+                   const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42;
+                   const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6;
+                   const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8;
+                   const real_t q_tmp_3_6 = tmp_qloop_43;
+                   const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42;
+                   const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42;
+                   const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42;
+                   const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47;
+                   const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47;
+                   const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47;
+                   const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47;
+                   const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6;
+                   const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8;
+                   const real_t q_tmp_3_16 = tmp_qloop_48;
+                   const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47;
+                   const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47;
+                   const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47;
+                   const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51;
+                   const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51;
+                   const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51;
+                   const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51;
+                   const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60;
+                   const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8;
+                   const real_t q_tmp_3_26 = tmp_qloop_52;
+                   const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51;
+                   const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51;
+                   const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51;
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_0_6 = q_acc_0_6 + q_tmp_0_6;
+                   q_acc_0_7 = q_acc_0_7 + q_tmp_0_7;
+                   q_acc_0_8 = q_acc_0_8 + q_tmp_0_8;
+                   q_acc_0_9 = q_acc_0_9 + q_tmp_0_9;
+                   q_acc_0_10 = q_acc_0_10 + q_tmp_0_10;
+                   q_acc_0_11 = q_acc_0_11 + q_tmp_0_11;
+                   q_acc_0_12 = q_acc_0_12 + q_tmp_0_12;
+                   q_acc_0_13 = q_acc_0_13 + q_tmp_0_13;
+                   q_acc_0_14 = q_acc_0_14 + q_tmp_0_14;
+                   q_acc_0_15 = q_acc_0_15 + q_tmp_0_15;
+                   q_acc_0_16 = q_acc_0_16 + q_tmp_0_16;
+                   q_acc_0_17 = q_acc_0_17 + q_tmp_0_17;
+                   q_acc_0_18 = q_acc_0_18 + q_tmp_0_18;
+                   q_acc_0_19 = q_acc_0_19 + q_tmp_0_19;
+                   q_acc_0_20 = q_acc_0_20 + q_tmp_0_20;
+                   q_acc_0_21 = q_acc_0_21 + q_tmp_0_21;
+                   q_acc_0_22 = q_acc_0_22 + q_tmp_0_22;
+                   q_acc_0_23 = q_acc_0_23 + q_tmp_0_23;
+                   q_acc_0_24 = q_acc_0_24 + q_tmp_0_24;
+                   q_acc_0_25 = q_acc_0_25 + q_tmp_0_25;
+                   q_acc_0_26 = q_acc_0_26 + q_tmp_0_26;
+                   q_acc_0_27 = q_acc_0_27 + q_tmp_0_27;
+                   q_acc_0_28 = q_acc_0_28 + q_tmp_0_28;
+                   q_acc_0_29 = q_acc_0_29 + q_tmp_0_29;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_1_6 = q_acc_1_6 + q_tmp_1_6;
+                   q_acc_1_7 = q_acc_1_7 + q_tmp_1_7;
+                   q_acc_1_8 = q_acc_1_8 + q_tmp_1_8;
+                   q_acc_1_9 = q_acc_1_9 + q_tmp_1_9;
+                   q_acc_1_10 = q_acc_1_10 + q_tmp_1_10;
+                   q_acc_1_11 = q_acc_1_11 + q_tmp_1_11;
+                   q_acc_1_12 = q_acc_1_12 + q_tmp_1_12;
+                   q_acc_1_13 = q_acc_1_13 + q_tmp_1_13;
+                   q_acc_1_14 = q_acc_1_14 + q_tmp_1_14;
+                   q_acc_1_15 = q_acc_1_15 + q_tmp_1_15;
+                   q_acc_1_16 = q_acc_1_16 + q_tmp_1_16;
+                   q_acc_1_17 = q_acc_1_17 + q_tmp_1_17;
+                   q_acc_1_18 = q_acc_1_18 + q_tmp_1_18;
+                   q_acc_1_19 = q_acc_1_19 + q_tmp_1_19;
+                   q_acc_1_20 = q_acc_1_20 + q_tmp_1_20;
+                   q_acc_1_21 = q_acc_1_21 + q_tmp_1_21;
+                   q_acc_1_22 = q_acc_1_22 + q_tmp_1_22;
+                   q_acc_1_23 = q_acc_1_23 + q_tmp_1_23;
+                   q_acc_1_24 = q_acc_1_24 + q_tmp_1_24;
+                   q_acc_1_25 = q_acc_1_25 + q_tmp_1_25;
+                   q_acc_1_26 = q_acc_1_26 + q_tmp_1_26;
+                   q_acc_1_27 = q_acc_1_27 + q_tmp_1_27;
+                   q_acc_1_28 = q_acc_1_28 + q_tmp_1_28;
+                   q_acc_1_29 = q_acc_1_29 + q_tmp_1_29;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_2_6 = q_acc_2_6 + q_tmp_2_6;
+                   q_acc_2_7 = q_acc_2_7 + q_tmp_2_7;
+                   q_acc_2_8 = q_acc_2_8 + q_tmp_2_8;
+                   q_acc_2_9 = q_acc_2_9 + q_tmp_2_9;
+                   q_acc_2_10 = q_acc_2_10 + q_tmp_2_10;
+                   q_acc_2_11 = q_acc_2_11 + q_tmp_2_11;
+                   q_acc_2_12 = q_acc_2_12 + q_tmp_2_12;
+                   q_acc_2_13 = q_acc_2_13 + q_tmp_2_13;
+                   q_acc_2_14 = q_acc_2_14 + q_tmp_2_14;
+                   q_acc_2_15 = q_acc_2_15 + q_tmp_2_15;
+                   q_acc_2_16 = q_acc_2_16 + q_tmp_2_16;
+                   q_acc_2_17 = q_acc_2_17 + q_tmp_2_17;
+                   q_acc_2_18 = q_acc_2_18 + q_tmp_2_18;
+                   q_acc_2_19 = q_acc_2_19 + q_tmp_2_19;
+                   q_acc_2_20 = q_acc_2_20 + q_tmp_2_20;
+                   q_acc_2_21 = q_acc_2_21 + q_tmp_2_21;
+                   q_acc_2_22 = q_acc_2_22 + q_tmp_2_22;
+                   q_acc_2_23 = q_acc_2_23 + q_tmp_2_23;
+                   q_acc_2_24 = q_acc_2_24 + q_tmp_2_24;
+                   q_acc_2_25 = q_acc_2_25 + q_tmp_2_25;
+                   q_acc_2_26 = q_acc_2_26 + q_tmp_2_26;
+                   q_acc_2_27 = q_acc_2_27 + q_tmp_2_27;
+                   q_acc_2_28 = q_acc_2_28 + q_tmp_2_28;
+                   q_acc_2_29 = q_acc_2_29 + q_tmp_2_29;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_3_6 = q_acc_3_6 + q_tmp_3_6;
+                   q_acc_3_7 = q_acc_3_7 + q_tmp_3_7;
+                   q_acc_3_8 = q_acc_3_8 + q_tmp_3_8;
+                   q_acc_3_9 = q_acc_3_9 + q_tmp_3_9;
+                   q_acc_3_10 = q_acc_3_10 + q_tmp_3_10;
+                   q_acc_3_11 = q_acc_3_11 + q_tmp_3_11;
+                   q_acc_3_12 = q_acc_3_12 + q_tmp_3_12;
+                   q_acc_3_13 = q_acc_3_13 + q_tmp_3_13;
+                   q_acc_3_14 = q_acc_3_14 + q_tmp_3_14;
+                   q_acc_3_15 = q_acc_3_15 + q_tmp_3_15;
+                   q_acc_3_16 = q_acc_3_16 + q_tmp_3_16;
+                   q_acc_3_17 = q_acc_3_17 + q_tmp_3_17;
+                   q_acc_3_18 = q_acc_3_18 + q_tmp_3_18;
+                   q_acc_3_19 = q_acc_3_19 + q_tmp_3_19;
+                   q_acc_3_20 = q_acc_3_20 + q_tmp_3_20;
+                   q_acc_3_21 = q_acc_3_21 + q_tmp_3_21;
+                   q_acc_3_22 = q_acc_3_22 + q_tmp_3_22;
+                   q_acc_3_23 = q_acc_3_23 + q_tmp_3_23;
+                   q_acc_3_24 = q_acc_3_24 + q_tmp_3_24;
+                   q_acc_3_25 = q_acc_3_25 + q_tmp_3_25;
+                   q_acc_3_26 = q_acc_3_26 + q_tmp_3_26;
+                   q_acc_3_27 = q_acc_3_27 + q_tmp_3_27;
+                   q_acc_3_28 = q_acc_3_28 + q_tmp_3_28;
+                   q_acc_3_29 = q_acc_3_29 + q_tmp_3_29;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9;
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
similarity index 78%
rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp
rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
index 918e7cb3..8e09db72 100644
--- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp
+++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
@@ -47,6 +47,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -60,7 +64,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
@@ -69,33 +73,55 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t *
    
        const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
    
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
        {
           /* FaceType.GRAY */
-          const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
-          const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
-          const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
-          const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
-          const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
-          const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
-          const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
-          const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY;
           for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
@@ -153,18 +179,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t *
              for (int64_t q = 0; q < 4; q += 1)
              {
                 const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
-                const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
                 const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
                 const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
                 const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
                 const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
                 const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
-                const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_8 = -tmp_qloop_7;
-                const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
-                const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
-                const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
-                const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
                 const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
                 const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
                 const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
@@ -203,11 +222,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t *
                 const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
                 const real_t tmp_qloop_52 = tmp_qloop_39*4.0;
                 const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52;
-                const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
-                const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
-                const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
-                const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
-                const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
                 const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
                 const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
                 const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
@@ -215,6 +229,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t *
                 const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
                 const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
                 const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
+                const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
+                const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
+                const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
+                const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
                 const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
                 const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
                 const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
@@ -316,37 +335,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t *
              _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
           }
        }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
        {
           /* FaceType.BLUE */
-          const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
-          const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
-          const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
-          const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
-          const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
-          const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
-          const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
-          const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
-          const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
-          const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
-          const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
-          const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE;
           for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
@@ -404,18 +438,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t *
              for (int64_t q = 0; q < 4; q += 1)
              {
                 const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
-                const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
                 const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
                 const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
                 const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
                 const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
                 const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
-                const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_8 = -tmp_qloop_7;
-                const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
-                const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
-                const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
-                const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
                 const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
                 const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
                 const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
@@ -454,11 +481,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t *
                 const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
                 const real_t tmp_qloop_52 = tmp_qloop_39*4.0;
                 const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52;
-                const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
-                const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
-                const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
-                const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
-                const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
                 const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
                 const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
                 const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
@@ -466,6 +488,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t *
                 const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
                 const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
                 const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
+                const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
+                const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
+                const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
+                const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
                 const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
                 const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
                 const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
similarity index 80%
rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp
rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
index 832f7305..e9465ae2 100644
--- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp
+++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp
@@ -47,6 +47,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -60,7 +64,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, idx_t * RESTRICT  _data_src_edge_0, idx_t * RESTRICT  _data_src_edge_1, idx_t * RESTRICT  _data_src_vertex_0, idx_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, idx_t * RESTRICT  _data_src_edge_0, idx_t * RESTRICT  _data_src_edge_1, idx_t * RESTRICT  _data_src_vertex_0, idx_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
@@ -69,33 +73,55 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t
    
        const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
    
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
+       const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_8 = -tmp_qloop_7;
+       const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
+       const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
+       const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
+       const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
        {
           /* FaceType.GRAY */
-          const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
-          const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
-          const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
-          const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
-          const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
-          const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
-          const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
-          const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY;
           for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
@@ -141,18 +167,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t
              for (int64_t q = 0; q < 4; q += 1)
              {
                 const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
-                const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
                 const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
                 const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
                 const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
                 const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
                 const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
-                const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_8 = -tmp_qloop_7;
-                const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
-                const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
-                const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
-                const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
                 const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
                 const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
                 const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
@@ -191,11 +210,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t
                 const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
                 const real_t tmp_qloop_52 = tmp_qloop_39*4.0;
                 const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52;
-                const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
-                const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
-                const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
-                const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
-                const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
                 const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
                 const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
                 const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
@@ -203,6 +217,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t
                 const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
                 const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
                 const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
+                const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
+                const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
+                const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
+                const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
                 const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
                 const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
                 const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
@@ -398,37 +417,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
        {
           /* FaceType.BLUE */
-          const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
-          const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
-          const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
-          const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
-          const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
-          const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
-          const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
-          const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
-          const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
-          const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
-          const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
-          const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE;
           for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
@@ -474,18 +508,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t
              for (int64_t q = 0; q < 4; q += 1)
              {
                 const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q];
-                const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1;
                 const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0);
                 const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q];
                 const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
                 const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
                 const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000);
-                const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_8 = -tmp_qloop_7;
-                const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1));
-                const real_t tmp_qloop_10 = -radRayVertex + radRefVertex;
-                const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9;
-                const real_t tmp_qloop_12 = tmp_qloop_11*1.0;
                 const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6;
                 const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13;
                 const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000);
@@ -524,11 +551,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t
                 const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44;
                 const real_t tmp_qloop_52 = tmp_qloop_39*4.0;
                 const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52;
-                const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
-                const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
-                const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
-                const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
-                const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
                 const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4;
                 const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19;
                 const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3;
@@ -536,6 +558,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t
                 const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0;
                 const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21);
                 const real_t abs_det_jac_blending = tmp_qloop_21;
+                const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q];
+                const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
+                const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q];
+                const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q];
+                const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54;
                 const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22;
                 const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22;
                 const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22;
diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
similarity index 82%
rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
index 77f66b1a..dc411f79 100644
--- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
@@ -47,6 +47,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -60,7 +64,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_edge_2, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t * RESTRICT  _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_edge_2, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t * RESTRICT  _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
@@ -71,61 +75,110 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
    
        const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
    
+       const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
+       const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
+       const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
+       const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
+       const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
+       const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
+       const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
+       const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
+       const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
+       const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
+       const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
+       const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
+       const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP;
+       const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP);
+       const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
+       const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
+       const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
+       const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP);
+       const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_1 = -rayVertex_0;
+       const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
+       const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
+       const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
+       const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
+       const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
+       const real_t tmp_qloop_8 = -rayVertex_1;
+       const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
+       const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = -rayVertex_2;
+       const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
+       const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
+       const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
+       const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
+       const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
+       const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
+       const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
+       const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
+       const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
+       const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+       const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
+       const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
+       const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
+       const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
+       const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
        {
           /* CellType.WHITE_UP */
-          const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
-          const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
-          const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
-          const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
-          const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
-          const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
-          const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
-          const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
-          const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
-          const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
-          const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
-          const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
-          const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
-          const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
-          const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP);
-          const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
-          const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
-          const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
-          const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
-          const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
@@ -288,28 +341,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -320,28 +353,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -349,11 +375,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -415,14 +439,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -440,6 +457,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -749,73 +773,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
           }
        }
+       const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
+       const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
+       const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
+       const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
+       const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
+       const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
+       const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
+       const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
+       const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
+       const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN;
+       const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN);
+       const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
+       const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
+       const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
+       const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN);
        {
           /* CellType.WHITE_DOWN */
-          const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
-          const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
-          const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
-          const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
-          const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
-          const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
-          const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
-          const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
-          const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
-          const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
-          const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
-          const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
-          const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
-          const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
-          const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
-          const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
-          const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
-          const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
-          const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
-          const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
-          const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
-          const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
-          const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
-          const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
-          const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN);
-          const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
-          const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
-          const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
-          const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
@@ -978,28 +1022,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -1010,28 +1034,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -1039,11 +1056,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -1105,14 +1120,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -1130,6 +1138,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -1439,67 +1454,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
           }
        }
+       const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
+       const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
+       const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
+       const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
+       const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
+       const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
+       const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
+       const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
+       const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
+       const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
+       const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
+       const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
+       const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
+       const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP;
+       const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP);
+       const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
+       const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
+       const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
+       const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP);
        {
           /* CellType.BLUE_UP */
-          const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
-          const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
-          const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
-          const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
-          const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
-          const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
-          const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
-          const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
-          const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
-          const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
-          const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
-          const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
-          const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
-          const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
-          const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
-          const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
-          const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
-          const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
-          const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
-          const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
-          const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
-          const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
-          const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
-          const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP);
-          const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
-          const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
-          const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
-          const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
-          const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -1662,28 +1697,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -1694,28 +1709,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -1723,11 +1731,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -1789,14 +1795,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -1814,6 +1813,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -2123,70 +2129,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
           }
        }
+       const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
+       const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
+       const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
+       const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
+       const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
+       const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
+       const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
+       const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN;
+       const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN);
+       const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
+       const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
+       const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
+       const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN);
        {
           /* CellType.BLUE_DOWN */
-          const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
-          const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
-          const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
-          const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
-          const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
-          const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
-          const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
-          const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
-          const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
-          const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
-          const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
-          const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
-          const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
-          const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
-          const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
-          const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
-          const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
-          const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
-          const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
-          const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
-          const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
-          const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
-          const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
-          const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
-          const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
-          const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN);
-          const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
-          const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
-          const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
-          const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -2349,28 +2375,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -2381,28 +2387,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -2410,11 +2409,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -2476,14 +2473,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -2501,6 +2491,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -2810,67 +2807,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
           }
        }
+       const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
+       const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
+       const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
+       const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
+       const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
+       const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
+       const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
+       const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
+       const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
+       const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
+       const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
+       const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
+       const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
+       const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
+       const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
+       const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
+       const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP;
+       const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP);
+       const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
+       const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
+       const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
+       const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP);
        {
           /* CellType.GREEN_UP */
-          const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
-          const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
-          const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
-          const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
-          const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
-          const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
-          const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
-          const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
-          const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
-          const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
-          const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
-          const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
-          const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
-          const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
-          const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
-          const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
-          const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
-          const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
-          const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
-          const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
-          const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP);
-          const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
-          const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
-          const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
-          const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
-          const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
@@ -3033,28 +3050,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -3065,28 +3062,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -3094,11 +3084,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -3160,14 +3148,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -3185,6 +3166,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -3494,70 +3482,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
           }
        }
+       const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
+       const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
+       const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
+       const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
+       const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
+       const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
+       const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
+       const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
+       const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
+       const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
+       const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN;
+       const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN);
+       const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
+       const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
+       const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
+       const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN);
        {
           /* CellType.GREEN_DOWN */
-          const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
-          const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
-          const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
-          const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
-          const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
-          const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
-          const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
-          const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
-          const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
-          const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
-          const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
-          const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
-          const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
-          const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
-          const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
-          const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
-          const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
-          const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
-          const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
-          const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
-          const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
-          const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
-          const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN);
-          const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
-          const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
-          const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
-          const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -3720,28 +3728,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -3752,28 +3740,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -3781,11 +3762,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -3847,14 +3826,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -3872,6 +3844,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D(
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
similarity index 84%
rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp
rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
index 05c564b4..06750a93 100644
--- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp
+++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp
@@ -47,6 +47,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -60,7 +64,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, idx_t * RESTRICT  _data_src_edge_0, idx_t * RESTRICT  _data_src_edge_1, idx_t * RESTRICT  _data_src_edge_2, idx_t * RESTRICT  _data_src_vertex_0, idx_t * RESTRICT  _data_src_vertex_1, idx_t * RESTRICT  _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, idx_t * RESTRICT  _data_src_edge_0, idx_t * RESTRICT  _data_src_edge_1, idx_t * RESTRICT  _data_src_edge_2, idx_t * RESTRICT  _data_src_vertex_0, idx_t * RESTRICT  _data_src_vertex_1, idx_t * RESTRICT  _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
@@ -71,61 +75,110 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
    
        const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
    
+       const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
+       const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
+       const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
+       const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
+       const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
+       const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
+       const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
+       const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
+       const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
+       const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
+       const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
+       const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
+       const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP;
+       const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP);
+       const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
+       const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
+       const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
+       const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP);
+       const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_1 = -rayVertex_0;
+       const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
+       const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
+       const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
+       const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
+       const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
+       const real_t tmp_qloop_8 = -rayVertex_1;
+       const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
+       const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = -rayVertex_2;
+       const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
+       const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
+       const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
+       const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
+       const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
+       const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
+       const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
+       const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
+       const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
+       const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+       const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
+       const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
+       const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
+       const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
+       const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
        {
           /* CellType.WHITE_UP */
-          const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
-          const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
-          const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
-          const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
-          const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
-          const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
-          const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
-          const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
-          const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
-          const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
-          const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
-          const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
-          const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
-          const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
-          const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP);
-          const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
-          const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
-          const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
-          const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
-          const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
@@ -258,28 +311,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -290,28 +323,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -319,11 +345,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -385,14 +409,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -410,6 +427,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -998,73 +1022,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
+       const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
+       const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
+       const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
+       const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
+       const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
+       const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
+       const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
+       const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
+       const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN;
+       const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN);
+       const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
+       const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
+       const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
+       const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN);
        {
           /* CellType.WHITE_DOWN */
-          const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
-          const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
-          const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
-          const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
-          const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
-          const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
-          const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
-          const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
-          const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
-          const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
-          const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
-          const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
-          const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
-          const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
-          const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
-          const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
-          const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
-          const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
-          const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
-          const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
-          const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
-          const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
-          const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
-          const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
-          const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN);
-          const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
-          const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
-          const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
-          const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
@@ -1197,28 +1241,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -1229,28 +1253,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -1258,11 +1275,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -1324,14 +1339,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -1349,6 +1357,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -1937,67 +1952,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
+       const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
+       const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
+       const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
+       const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
+       const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
+       const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
+       const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
+       const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
+       const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
+       const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
+       const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
+       const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
+       const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP;
+       const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP);
+       const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
+       const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
+       const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
+       const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP);
        {
           /* CellType.BLUE_UP */
-          const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
-          const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
-          const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
-          const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
-          const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
-          const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
-          const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
-          const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
-          const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
-          const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
-          const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
-          const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
-          const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
-          const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
-          const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
-          const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
-          const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
-          const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
-          const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
-          const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
-          const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
-          const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
-          const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
-          const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP);
-          const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
-          const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
-          const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
-          const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
-          const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -2130,28 +2165,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -2162,28 +2177,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -2191,11 +2199,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -2257,14 +2263,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -2282,6 +2281,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -2870,70 +2876,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
+       const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
+       const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
+       const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
+       const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
+       const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
+       const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
+       const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN;
+       const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN);
+       const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
+       const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
+       const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
+       const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN);
        {
           /* CellType.BLUE_DOWN */
-          const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
-          const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
-          const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
-          const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
-          const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
-          const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
-          const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
-          const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
-          const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
-          const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
-          const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
-          const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
-          const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
-          const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
-          const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
-          const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
-          const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
-          const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
-          const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
-          const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
-          const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
-          const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
-          const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
-          const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
-          const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
-          const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN);
-          const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
-          const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
-          const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
-          const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -3066,28 +3092,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -3098,28 +3104,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -3127,11 +3126,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -3193,14 +3190,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -3218,6 +3208,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -3806,67 +3803,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
+       const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
+       const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
+       const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
+       const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
+       const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
+       const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
+       const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
+       const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
+       const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
+       const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
+       const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
+       const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
+       const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
+       const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
+       const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
+       const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP;
+       const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP);
+       const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
+       const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
+       const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
+       const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP);
        {
           /* CellType.GREEN_UP */
-          const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
-          const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
-          const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
-          const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
-          const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
-          const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
-          const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
-          const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
-          const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
-          const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
-          const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
-          const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
-          const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
-          const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
-          const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
-          const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
-          const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
-          const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
-          const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
-          const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
-          const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP);
-          const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
-          const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
-          const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
-          const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
-          const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
@@ -3999,28 +4016,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -4031,28 +4028,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -4060,11 +4050,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -4126,14 +4114,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -4151,6 +4132,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
@@ -4739,70 +4727,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
+       const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
+       const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
+       const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
+       const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
+       const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
+       const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
+       const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
+       const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
+       const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
+       const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN;
+       const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN);
+       const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
+       const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
+       const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
+       const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN);
        {
           /* CellType.GREEN_DOWN */
-          const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
-          const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
-          const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
-          const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
-          const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
-          const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
-          const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
-          const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
-          const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
-          const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
-          const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
-          const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
-          const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
-          const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
-          const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
-          const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
-          const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
-          const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
-          const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
-          const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
-          const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
-          const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
-          const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN);
-          const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
-          const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
-          const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
-          const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -4935,28 +4943,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
              real_t q_acc_3_29 = 0.0;
              for (int64_t q = 0; q < 5; q += 1)
              {
-                const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1;
-                const real_t tmp_qloop_1 = -rayVertex_0;
-                const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1;
-                const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2;
-                const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3;
-                const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2;
-                const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1;
-                const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6;
-                const real_t tmp_qloop_8 = -rayVertex_1;
-                const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8;
-                const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0;
-                const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
-                const real_t tmp_qloop_12 = -rayVertex_2;
-                const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12;
-                const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9;
-                const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13;
-                const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7;
-                const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16);
                 const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q];
                 const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18);
-                const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16;
-                const real_t tmp_qloop_21 = radRayVertex - radRefVertex;
                 const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q];
                 const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22;
                 const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18;
@@ -4967,28 +4955,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25);
                 const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22);
                 const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30;
-                const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14;
-                const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33;
                 const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34;
                 const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35);
                 const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000);
                 const real_t tmp_qloop_38 = tmp_qloop_37*1.0;
-                const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4;
-                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40;
                 const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41;
                 const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38;
                 const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43;
-                const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7;
-                const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_47 = -tmp_qloop_28;
                 const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47;
                 const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48;
                 const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
                 const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33;
                 const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51;
-                const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39;
                 const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28;
                 const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54;
                 const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46;
@@ -4996,11 +4977,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57;
                 const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0;
                 const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66;
-                const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32;
                 const real_t tmp_qloop_69 = tmp_qloop_18*2.0;
                 const real_t tmp_qloop_70 = -tmp_qloop_41;
                 const real_t tmp_qloop_71 = tmp_qloop_35*2.0;
-                const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45;
                 const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72;
                 const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27;
                 const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74;
@@ -5062,14 +5041,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131;
                 const real_t tmp_qloop_133 = tmp_qloop_103*4.0;
                 const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133;
-                const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
-                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
-                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
-                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
-                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
                 const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111;
-                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
-                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38;
                 const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44;
                 const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50;
@@ -5087,6 +5059,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_
                 const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63;
                 const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64);
                 const real_t abs_det_jac_blending = tmp_qloop_64;
+                const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q];
+                const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
+                const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q];
+                const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q];
+                const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q];
+                const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158;
+                const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135;
                 const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61);
                 const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62);
                 const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63);
diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
similarity index 74%
rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp
rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
index a161a3a8..f1f6f39b 100644
--- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp
+++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
@@ -67,33 +71,48 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT
    
        const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
    
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
        {
           /* FaceType.GRAY */
-          const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
-          const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
-          const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
-          const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
-          const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
-          const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
-          const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
-          const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY;
           for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
@@ -167,7 +186,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT
                 const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
                 const real_t tmp_qloop_15 = tmp_qloop_2*4.0;
                 const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15;
-                const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
+                const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
                 const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
                 const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18;
                 const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10;
@@ -260,37 +279,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT
              _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
           }
        }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
        {
           /* FaceType.BLUE */
-          const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
-          const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
-          const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
-          const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
-          const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
-          const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
-          const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
-          const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
-          const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
-          const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
-          const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
-          const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE;
           for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
@@ -364,7 +398,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT
                 const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
                 const real_t tmp_qloop_15 = tmp_qloop_2*4.0;
                 const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15;
-                const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
+                const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
                 const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
                 const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18;
                 const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10;
diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
similarity index 78%
rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp
rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
index 45073514..f8bf62a3 100644
--- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp
+++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_edge_2, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t * RESTRICT  _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, real_t * RESTRICT  _data_src_edge_0, real_t * RESTRICT  _data_src_edge_1, real_t * RESTRICT  _data_src_edge_2, real_t * RESTRICT  _data_src_vertex_0, real_t * RESTRICT  _data_src_vertex_1, real_t * RESTRICT  _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
@@ -69,61 +73,81 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
    
        const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
    
+       const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
+       const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
+       const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
+       const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
+       const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
+       const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
+       const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
+       const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
+       const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
+       const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
+       const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
+       const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
+       const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP;
+       const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP);
+       const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
+       const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
+       const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
+       const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP);
        {
           /* CellType.WHITE_UP */
-          const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
-          const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
-          const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
-          const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
-          const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
-          const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
-          const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
-          const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
-          const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
-          const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
-          const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
-          const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
-          const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
-          const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
-          const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP);
-          const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
-          const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
-          const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
-          const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
-          const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
@@ -318,7 +342,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22;
@@ -598,73 +622,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
              _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
           }
        }
+       const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
+       const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
+       const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
+       const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
+       const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
+       const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
+       const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
+       const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
+       const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
+       const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN;
+       const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN);
+       const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
+       const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
+       const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
+       const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN);
        {
           /* CellType.WHITE_DOWN */
-          const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
-          const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
-          const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
-          const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
-          const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
-          const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
-          const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
-          const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
-          const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
-          const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
-          const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
-          const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
-          const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
-          const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
-          const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
-          const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
-          const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
-          const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
-          const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
-          const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
-          const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
-          const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
-          const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
-          const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
-          const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN);
-          const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
-          const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
-          const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
-          const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
@@ -859,7 +903,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22;
@@ -1139,67 +1183,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
              _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
           }
        }
+       const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
+       const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
+       const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
+       const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
+       const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
+       const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
+       const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
+       const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
+       const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
+       const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
+       const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
+       const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
+       const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
+       const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP;
+       const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP);
+       const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
+       const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
+       const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
+       const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP);
        {
           /* CellType.BLUE_UP */
-          const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
-          const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
-          const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
-          const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
-          const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
-          const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
-          const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
-          const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
-          const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
-          const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
-          const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
-          const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
-          const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
-          const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
-          const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
-          const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
-          const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
-          const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
-          const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
-          const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
-          const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
-          const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
-          const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
-          const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP);
-          const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
-          const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
-          const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
-          const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
-          const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -1394,7 +1458,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22;
@@ -1674,70 +1738,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
              _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
           }
        }
+       const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
+       const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
+       const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
+       const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
+       const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
+       const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
+       const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
+       const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN;
+       const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN);
+       const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
+       const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
+       const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
+       const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN);
        {
           /* CellType.BLUE_DOWN */
-          const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
-          const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
-          const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
-          const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
-          const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
-          const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
-          const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
-          const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
-          const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
-          const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
-          const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
-          const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
-          const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
-          const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
-          const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
-          const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
-          const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
-          const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
-          const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
-          const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
-          const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
-          const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
-          const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
-          const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
-          const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
-          const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN);
-          const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
-          const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
-          const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
-          const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -1932,7 +2016,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22;
@@ -2212,67 +2296,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
              _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
           }
        }
+       const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
+       const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
+       const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
+       const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
+       const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
+       const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
+       const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
+       const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
+       const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
+       const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
+       const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
+       const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
+       const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
+       const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
+       const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
+       const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
+       const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP;
+       const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP);
+       const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
+       const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
+       const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
+       const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP);
        {
           /* CellType.GREEN_UP */
-          const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
-          const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
-          const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
-          const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
-          const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
-          const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
-          const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
-          const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
-          const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
-          const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
-          const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
-          const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
-          const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
-          const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
-          const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
-          const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
-          const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
-          const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
-          const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
-          const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
-          const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP);
-          const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
-          const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
-          const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
-          const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
-          const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
@@ -2467,7 +2571,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22;
@@ -2747,70 +2851,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
              _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
           }
        }
+       const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
+       const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
+       const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
+       const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
+       const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
+       const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
+       const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
+       const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
+       const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
+       const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
+       const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN;
+       const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN);
+       const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
+       const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
+       const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
+       const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN);
        {
           /* CellType.GREEN_DOWN */
-          const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
-          const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
-          const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
-          const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
-          const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
-          const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
-          const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
-          const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
-          const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
-          const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
-          const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
-          const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
-          const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
-          const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
-          const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
-          const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
-          const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
-          const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
-          const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
-          const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
-          const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
-          const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
-          const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN);
-          const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
-          const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
-          const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
-          const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -3005,7 +3129,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22;
diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
similarity index 77%
rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp
rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
index 8d57d088..d1381740 100644
--- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp
+++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, idx_t * RESTRICT  _data_src_edge_0, idx_t * RESTRICT  _data_src_edge_1, idx_t * RESTRICT  _data_src_vertex_0, idx_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, idx_t * RESTRICT  _data_src_edge_0, idx_t * RESTRICT  _data_src_edge_1, idx_t * RESTRICT  _data_src_vertex_0, idx_t * RESTRICT  _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
@@ -67,33 +71,48 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC
    
        const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
    
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
        {
           /* FaceType.GRAY */
-          const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
-          const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
-          const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
-          const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
-          const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
-          const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
-          const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
-          const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY;
-          const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY;
           for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
@@ -155,7 +174,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC
                 const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
                 const real_t tmp_qloop_15 = tmp_qloop_2*4.0;
                 const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15;
-                const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
+                const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
                 const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
                 const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18;
                 const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10;
@@ -342,37 +361,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
        {
           /* FaceType.BLUE */
-          const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
-          const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
-          const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
-          const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
-          const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
-          const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
-          const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
-          const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
-          const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
-          const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
-          const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
-          const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE;
-          const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE;
           for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
@@ -434,7 +468,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC
                 const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7;
                 const real_t tmp_qloop_15 = tmp_qloop_2*4.0;
                 const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15;
-                const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
+                const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q];
                 const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]);
                 const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18;
                 const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10;
diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
similarity index 81%
rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp
rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
index 0945e0d9..a624289c 100644
--- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp
+++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, idx_t * RESTRICT  _data_src_edge_0, idx_t * RESTRICT  _data_src_edge_1, idx_t * RESTRICT  _data_src_edge_2, idx_t * RESTRICT  _data_src_vertex_0, idx_t * RESTRICT  _data_src_vertex_1, idx_t * RESTRICT  _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_rhoEdge, real_t * RESTRICT  _data_rhoVertex, idx_t * RESTRICT  _data_src_edge_0, idx_t * RESTRICT  _data_src_edge_1, idx_t * RESTRICT  _data_src_edge_2, idx_t * RESTRICT  _data_src_vertex_0, idx_t * RESTRICT  _data_src_vertex_1, idx_t * RESTRICT  _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
@@ -69,61 +73,81 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
    
        const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
    
+       const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
+       const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
+       const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
+       const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
+       const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
+       const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
+       const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
+       const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
+       const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
+       const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
+       const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
+       const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
+       const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
+       const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP;
+       const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP);
+       const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
+       const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
+       const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
+       const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
+       const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
+       const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP);
        {
           /* CellType.WHITE_UP */
-          const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0;
-          const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1;
-          const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2;
-          const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP;
-          const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP;
-          const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP;
-          const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP;
-          const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP;
-          const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP;
-          const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP;
-          const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP;
-          const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP;
-          const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP;
-          const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP;
-          const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP;
-          const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP);
-          const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP);
-          const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP);
-          const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP);
-          const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP);
-          const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP);
-          const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
@@ -288,7 +312,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22;
@@ -847,73 +871,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
+       const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
+       const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
+       const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
+       const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
+       const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
+       const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
+       const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
+       const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
+       const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
+       const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
+       const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
+       const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
+       const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
+       const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
+       const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
+       const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
+       const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
+       const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN;
+       const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN);
+       const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
+       const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
+       const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
+       const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
+       const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
+       const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN);
        {
           /* CellType.WHITE_DOWN */
-          const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
-          const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
-          const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN;
-          const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN;
-          const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN;
-          const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN;
-          const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN;
-          const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN;
-          const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN;
-          const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN;
-          const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN;
-          const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN;
-          const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN;
-          const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN;
-          const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN;
-          const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN;
-          const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN;
-          const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN;
-          const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN;
-          const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN;
-          const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN;
-          const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN;
-          const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN;
-          const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN;
-          const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN;
-          const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN);
-          const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN);
-          const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN);
-          const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN);
-          const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN);
-          const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
@@ -1078,7 +1122,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22;
@@ -1637,67 +1681,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
+       const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
+       const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
+       const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
+       const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
+       const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
+       const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
+       const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
+       const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
+       const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
+       const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
+       const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
+       const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
+       const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
+       const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
+       const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
+       const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
+       const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
+       const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
+       const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP;
+       const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP);
+       const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
+       const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
+       const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
+       const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
+       const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
+       const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP);
        {
           /* CellType.BLUE_UP */
-          const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP;
-          const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP;
-          const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP;
-          const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP;
-          const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP;
-          const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP;
-          const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP;
-          const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP;
-          const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP;
-          const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP;
-          const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP;
-          const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP;
-          const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP;
-          const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP;
-          const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP;
-          const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP;
-          const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP;
-          const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP;
-          const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP;
-          const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP;
-          const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP;
-          const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP;
-          const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP;
-          const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP;
-          const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP);
-          const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP);
-          const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP);
-          const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP);
-          const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP);
-          const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP);
-          const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -1862,7 +1926,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22;
@@ -2421,70 +2485,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
+       const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
+       const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
+       const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
+       const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
+       const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
+       const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
+       const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
+       const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
+       const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
+       const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
+       const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
+       const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
+       const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
+       const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
+       const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
+       const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
+       const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN;
+       const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN);
+       const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
+       const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
+       const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
+       const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
+       const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
+       const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN);
        {
           /* CellType.BLUE_DOWN */
-          const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN;
-          const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN;
-          const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN;
-          const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN;
-          const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN;
-          const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN;
-          const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN;
-          const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN;
-          const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN;
-          const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN;
-          const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN;
-          const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN;
-          const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN;
-          const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN;
-          const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN;
-          const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN;
-          const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN;
-          const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN;
-          const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN;
-          const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN;
-          const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN;
-          const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN;
-          const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN;
-          const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN;
-          const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN;
-          const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN;
-          const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN);
-          const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN);
-          const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN);
-          const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN);
-          const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN);
-          const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -2649,7 +2733,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22;
@@ -3208,67 +3292,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
+       const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
+       const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
+       const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
+       const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
+       const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
+       const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
+       const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
+       const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
+       const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
+       const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
+       const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
+       const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
+       const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
+       const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
+       const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
+       const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
+       const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
+       const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
+       const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP;
+       const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP);
+       const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
+       const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
+       const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
+       const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
+       const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
+       const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP);
        {
           /* CellType.GREEN_UP */
-          const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP;
-          const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP;
-          const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP;
-          const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP;
-          const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP;
-          const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP;
-          const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP;
-          const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP;
-          const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP;
-          const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP;
-          const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP;
-          const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP;
-          const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP;
-          const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP;
-          const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP;
-          const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP;
-          const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP;
-          const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP;
-          const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP;
-          const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP;
-          const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP;
-          const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP);
-          const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP);
-          const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP);
-          const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP);
-          const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP);
-          const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP);
-          const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))];
@@ -3433,7 +3537,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22;
@@ -3992,70 +4096,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
              mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
           }
        }
+       const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
+       const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
+       const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
+       const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
+       const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
+       const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
+       const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
+       const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
+       const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
+       const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
+       const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
+       const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
+       const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
+       const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
+       const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
+       const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
+       const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
+       const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
+       const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
+       const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
+       const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
+       const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
+       const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN;
+       const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN);
+       const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
+       const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
+       const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
+       const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
+       const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
+       const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN);
        {
           /* CellType.GREEN_DOWN */
-          const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
-          const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
-          const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
-          const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2);
-          const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
-          const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
-          const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2);
-          const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0);
-          const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1);
-          const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2);
-          const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN;
-          const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN;
-          const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN;
-          const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN;
-          const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN;
-          const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN;
-          const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
-          const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
-          const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
-          const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN;
-          const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN;
-          const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN;
-          const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN;
-          const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN;
-          const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN;
-          const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN;
-          const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN;
-          const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN;
-          const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN;
-          const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN;
-          const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN;
-          const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN;
-          const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN;
-          const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN);
-          const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN);
-          const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN);
-          const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN);
-          const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN);
-          const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN);
           for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1)
           for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1)
           for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
           {
-             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2));
-             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1));
-             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1));
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+             real_t _data_float_loop_ctr_array_dim_2[4];
+             _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2;
+             _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0;
              const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))];
              const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1];
              const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1];
@@ -4220,7 +4344,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC
                 const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9;
                 const real_t tmp_qloop_30 = tmp_qloop_0*4.0;
                 const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8;
-                const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
+                const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q];
                 const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]);
                 const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33;
                 const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22;
diff --git a/operators/gradient/CMakeLists.txt b/operators/gradient/CMakeLists.txt
index 0c37a89a..6d3cdf34 100644
--- a/operators/gradient/CMakeLists.txt
+++ b/operators/gradient/CMakeLists.txt
@@ -21,40 +21,40 @@ add_library( opgen-gradient
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-gradient PRIVATE
 
-      avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
-      avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
-      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp
-      avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp
-      avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
-      avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
-      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp
-      avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp
-      avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp
-      avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
+      avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -65,26 +65,26 @@ else()
 
    target_sources(opgen-gradient PRIVATE
 
-      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp
-      noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp
-      noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp
index c7b6e5e1..b124e41b 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp
@@ -144,7 +144,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply( const P1Function< real_t >&
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -165,6 +165,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply( const P1Function< real_t >&
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -243,7 +244,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix( const std::shared_ptr< S
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -265,6 +266,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix( const std::shared_ptr< S
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp
index d822b1ad..5518ed90 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,57 +79,66 @@ class P1ToP2ElementwiseGradientAnnulusMap_0_0 : public Operator< P1Function< rea
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ToP2ElementwiseGradientAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    220     330      17      12      3              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                                real_t* RESTRICT _data_dstVertex,
+                                                                real_t* RESTRICT _data_src,
+                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                int64_t          micro_edges_per_macro_edge,
+                                                                real_t           micro_edges_per_macro_edge_float,
+                                                                real_t           radRayVertex,
+                                                                real_t           radRefVertex,
+                                                                real_t           rayVertex_0,
+                                                                real_t           rayVertex_1,
+                                                                real_t           refVertex_0,
+                                                                real_t           refVertex_1,
+                                                                real_t           thrVertex_0,
+                                                                real_t           thrVertex_1 ) const;
+
+   /// Integral: P1ToP2ElementwiseGradientAnnulusMap_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    202     312      17      12      3              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge,
+                                                                   idx_t* RESTRICT _data_dstVertex,
+                                                                   idx_t* RESTRICT _data_src,
+                                                                   real_t          macro_vertex_coord_id_0comp0,
+                                                                   real_t          macro_vertex_coord_id_0comp1,
+                                                                   real_t          macro_vertex_coord_id_1comp0,
+                                                                   real_t          macro_vertex_coord_id_1comp1,
+                                                                   real_t          macro_vertex_coord_id_2comp0,
+                                                                   real_t          macro_vertex_coord_id_2comp1,
+                                                                   std::shared_ptr< SparseMatrixProxy > mat,
+                                                                   int64_t micro_edges_per_macro_edge,
+                                                                   real_t  micro_edges_per_macro_edge_float,
+                                                                   real_t  radRayVertex,
+                                                                   real_t  radRefVertex,
+                                                                   real_t  rayVertex_0,
+                                                                   real_t  rayVertex_1,
+                                                                   real_t  refVertex_0,
+                                                                   real_t  refVertex_1,
+                                                                   real_t  thrVertex_0,
+                                                                   real_t  thrVertex_1 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp
index e1ee2e39..08305061 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp
@@ -144,7 +144,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply( const P1Function< real_t >&
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -165,6 +165,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply( const P1Function< real_t >&
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -243,7 +244,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix( const std::shared_ptr< S
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -265,6 +266,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix( const std::shared_ptr< S
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp
index 1c03c47c..eeb840ed 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,57 +79,66 @@ class P1ToP2ElementwiseGradientAnnulusMap_1_0 : public Operator< P1Function< rea
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ToP2ElementwiseGradientAnnulusMap_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    220     330      17      12      3              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                                real_t* RESTRICT _data_dstVertex,
+                                                                real_t* RESTRICT _data_src,
+                                                                real_t           macro_vertex_coord_id_0comp0,
+                                                                real_t           macro_vertex_coord_id_0comp1,
+                                                                real_t           macro_vertex_coord_id_1comp0,
+                                                                real_t           macro_vertex_coord_id_1comp1,
+                                                                real_t           macro_vertex_coord_id_2comp0,
+                                                                real_t           macro_vertex_coord_id_2comp1,
+                                                                int64_t          micro_edges_per_macro_edge,
+                                                                real_t           micro_edges_per_macro_edge_float,
+                                                                real_t           radRayVertex,
+                                                                real_t           radRefVertex,
+                                                                real_t           rayVertex_0,
+                                                                real_t           rayVertex_1,
+                                                                real_t           refVertex_0,
+                                                                real_t           refVertex_1,
+                                                                real_t           thrVertex_0,
+                                                                real_t           thrVertex_1 ) const;
+
+   /// Integral: P1ToP2ElementwiseGradientAnnulusMap_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    202     312      17      12      3              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge,
+                                                                   idx_t* RESTRICT _data_dstVertex,
+                                                                   idx_t* RESTRICT _data_src,
+                                                                   real_t          macro_vertex_coord_id_0comp0,
+                                                                   real_t          macro_vertex_coord_id_0comp1,
+                                                                   real_t          macro_vertex_coord_id_1comp0,
+                                                                   real_t          macro_vertex_coord_id_1comp1,
+                                                                   real_t          macro_vertex_coord_id_2comp0,
+                                                                   real_t          macro_vertex_coord_id_2comp1,
+                                                                   std::shared_ptr< SparseMatrixProxy > mat,
+                                                                   int64_t micro_edges_per_macro_edge,
+                                                                   real_t  micro_edges_per_macro_edge_float,
+                                                                   real_t  radRayVertex,
+                                                                   real_t  radRefVertex,
+                                                                   real_t  rayVertex_0,
+                                                                   real_t  rayVertex_1,
+                                                                   real_t  refVertex_0,
+                                                                   real_t  refVertex_1,
+                                                                   real_t  thrVertex_0,
+                                                                   real_t  thrVertex_1 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp
index 53aed4cf..d4d125c3 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp
@@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply( const P1Function<
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply( const P1Function<
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix( const std::shar
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix( const std::shar
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp
index 78819b72..16490ad8 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_0_0 : public Operator< P1Func
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    540     783      46       4      4              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                         real_t* RESTRICT _data_dstVertex,
+                                                                         real_t* RESTRICT _data_src,
+                                                                         real_t           forVertex_0,
+                                                                         real_t           forVertex_1,
+                                                                         real_t           forVertex_2,
+                                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                                         real_t           macro_vertex_coord_id_0comp2,
+                                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                                         real_t           macro_vertex_coord_id_1comp2,
+                                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                                         real_t           macro_vertex_coord_id_2comp2,
+                                                                         real_t           macro_vertex_coord_id_3comp0,
+                                                                         real_t           macro_vertex_coord_id_3comp1,
+                                                                         real_t           macro_vertex_coord_id_3comp2,
+                                                                         int64_t          micro_edges_per_macro_edge,
+                                                                         real_t           micro_edges_per_macro_edge_float,
+                                                                         real_t           radRayVertex,
+                                                                         real_t           radRefVertex,
+                                                                         real_t           rayVertex_0,
+                                                                         real_t           rayVertex_1,
+                                                                         real_t           rayVertex_2,
+                                                                         real_t           refVertex_0,
+                                                                         real_t           refVertex_1,
+                                                                         real_t           refVertex_2,
+                                                                         real_t           thrVertex_0,
+                                                                         real_t           thrVertex_1,
+                                                                         real_t           thrVertex_2 ) const;
+
+   /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    500     743      46       4      4              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge,
+                                                                            idx_t* RESTRICT _data_dstVertex,
+                                                                            idx_t* RESTRICT _data_src,
+                                                                            real_t          forVertex_0,
+                                                                            real_t          forVertex_1,
+                                                                            real_t          forVertex_2,
+                                                                            real_t          macro_vertex_coord_id_0comp0,
+                                                                            real_t          macro_vertex_coord_id_0comp1,
+                                                                            real_t          macro_vertex_coord_id_0comp2,
+                                                                            real_t          macro_vertex_coord_id_1comp0,
+                                                                            real_t          macro_vertex_coord_id_1comp1,
+                                                                            real_t          macro_vertex_coord_id_1comp2,
+                                                                            real_t          macro_vertex_coord_id_2comp0,
+                                                                            real_t          macro_vertex_coord_id_2comp1,
+                                                                            real_t          macro_vertex_coord_id_2comp2,
+                                                                            real_t          macro_vertex_coord_id_3comp0,
+                                                                            real_t          macro_vertex_coord_id_3comp1,
+                                                                            real_t          macro_vertex_coord_id_3comp2,
+                                                                            std::shared_ptr< SparseMatrixProxy > mat,
+                                                                            int64_t micro_edges_per_macro_edge,
+                                                                            real_t  micro_edges_per_macro_edge_float,
+                                                                            real_t  radRayVertex,
+                                                                            real_t  radRefVertex,
+                                                                            real_t  rayVertex_0,
+                                                                            real_t  rayVertex_1,
+                                                                            real_t  rayVertex_2,
+                                                                            real_t  refVertex_0,
+                                                                            real_t  refVertex_1,
+                                                                            real_t  refVertex_2,
+                                                                            real_t  thrVertex_0,
+                                                                            real_t  thrVertex_1,
+                                                                            real_t  thrVertex_2 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp
index 54ec309d..37126655 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp
@@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply( const P1Function<
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply( const P1Function<
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix( const std::shar
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix( const std::shar
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp
index 561a431e..7be898ba 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_1_0 : public Operator< P1Func
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    540     783      46       4      4              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                         real_t* RESTRICT _data_dstVertex,
+                                                                         real_t* RESTRICT _data_src,
+                                                                         real_t           forVertex_0,
+                                                                         real_t           forVertex_1,
+                                                                         real_t           forVertex_2,
+                                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                                         real_t           macro_vertex_coord_id_0comp2,
+                                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                                         real_t           macro_vertex_coord_id_1comp2,
+                                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                                         real_t           macro_vertex_coord_id_2comp2,
+                                                                         real_t           macro_vertex_coord_id_3comp0,
+                                                                         real_t           macro_vertex_coord_id_3comp1,
+                                                                         real_t           macro_vertex_coord_id_3comp2,
+                                                                         int64_t          micro_edges_per_macro_edge,
+                                                                         real_t           micro_edges_per_macro_edge_float,
+                                                                         real_t           radRayVertex,
+                                                                         real_t           radRefVertex,
+                                                                         real_t           rayVertex_0,
+                                                                         real_t           rayVertex_1,
+                                                                         real_t           rayVertex_2,
+                                                                         real_t           refVertex_0,
+                                                                         real_t           refVertex_1,
+                                                                         real_t           refVertex_2,
+                                                                         real_t           thrVertex_0,
+                                                                         real_t           thrVertex_1,
+                                                                         real_t           thrVertex_2 ) const;
+
+   /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    500     743      46       4      4              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge,
+                                                                            idx_t* RESTRICT _data_dstVertex,
+                                                                            idx_t* RESTRICT _data_src,
+                                                                            real_t          forVertex_0,
+                                                                            real_t          forVertex_1,
+                                                                            real_t          forVertex_2,
+                                                                            real_t          macro_vertex_coord_id_0comp0,
+                                                                            real_t          macro_vertex_coord_id_0comp1,
+                                                                            real_t          macro_vertex_coord_id_0comp2,
+                                                                            real_t          macro_vertex_coord_id_1comp0,
+                                                                            real_t          macro_vertex_coord_id_1comp1,
+                                                                            real_t          macro_vertex_coord_id_1comp2,
+                                                                            real_t          macro_vertex_coord_id_2comp0,
+                                                                            real_t          macro_vertex_coord_id_2comp1,
+                                                                            real_t          macro_vertex_coord_id_2comp2,
+                                                                            real_t          macro_vertex_coord_id_3comp0,
+                                                                            real_t          macro_vertex_coord_id_3comp1,
+                                                                            real_t          macro_vertex_coord_id_3comp2,
+                                                                            std::shared_ptr< SparseMatrixProxy > mat,
+                                                                            int64_t micro_edges_per_macro_edge,
+                                                                            real_t  micro_edges_per_macro_edge_float,
+                                                                            real_t  radRayVertex,
+                                                                            real_t  radRefVertex,
+                                                                            real_t  rayVertex_0,
+                                                                            real_t  rayVertex_1,
+                                                                            real_t  rayVertex_2,
+                                                                            real_t  refVertex_0,
+                                                                            real_t  refVertex_1,
+                                                                            real_t  refVertex_2,
+                                                                            real_t  thrVertex_0,
+                                                                            real_t  thrVertex_1,
+                                                                            real_t  thrVertex_2 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp
index 095574db..16dad771 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp
@@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply( const P1Function<
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply( const P1Function<
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix( const std::shar
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix( const std::shar
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp
index 10e81880..6d0336ae 100644
--- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp
+++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_2_0 : public Operator< P1Func
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    540     783      46       4      4              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                         real_t* RESTRICT _data_dstVertex,
+                                                                         real_t* RESTRICT _data_src,
+                                                                         real_t           forVertex_0,
+                                                                         real_t           forVertex_1,
+                                                                         real_t           forVertex_2,
+                                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                                         real_t           macro_vertex_coord_id_0comp2,
+                                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                                         real_t           macro_vertex_coord_id_1comp2,
+                                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                                         real_t           macro_vertex_coord_id_2comp2,
+                                                                         real_t           macro_vertex_coord_id_3comp0,
+                                                                         real_t           macro_vertex_coord_id_3comp1,
+                                                                         real_t           macro_vertex_coord_id_3comp2,
+                                                                         int64_t          micro_edges_per_macro_edge,
+                                                                         real_t           micro_edges_per_macro_edge_float,
+                                                                         real_t           radRayVertex,
+                                                                         real_t           radRefVertex,
+                                                                         real_t           rayVertex_0,
+                                                                         real_t           rayVertex_1,
+                                                                         real_t           rayVertex_2,
+                                                                         real_t           refVertex_0,
+                                                                         real_t           refVertex_1,
+                                                                         real_t           refVertex_2,
+                                                                         real_t           thrVertex_0,
+                                                                         real_t           thrVertex_1,
+                                                                         real_t           thrVertex_2 ) const;
+
+   /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    500     743      46       4      4              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge,
+                                                                            idx_t* RESTRICT _data_dstVertex,
+                                                                            idx_t* RESTRICT _data_src,
+                                                                            real_t          forVertex_0,
+                                                                            real_t          forVertex_1,
+                                                                            real_t          forVertex_2,
+                                                                            real_t          macro_vertex_coord_id_0comp0,
+                                                                            real_t          macro_vertex_coord_id_0comp1,
+                                                                            real_t          macro_vertex_coord_id_0comp2,
+                                                                            real_t          macro_vertex_coord_id_1comp0,
+                                                                            real_t          macro_vertex_coord_id_1comp1,
+                                                                            real_t          macro_vertex_coord_id_1comp2,
+                                                                            real_t          macro_vertex_coord_id_2comp0,
+                                                                            real_t          macro_vertex_coord_id_2comp1,
+                                                                            real_t          macro_vertex_coord_id_2comp2,
+                                                                            real_t          macro_vertex_coord_id_3comp0,
+                                                                            real_t          macro_vertex_coord_id_3comp1,
+                                                                            real_t          macro_vertex_coord_id_3comp2,
+                                                                            std::shared_ptr< SparseMatrixProxy > mat,
+                                                                            int64_t micro_edges_per_macro_edge,
+                                                                            real_t  micro_edges_per_macro_edge_float,
+                                                                            real_t  radRayVertex,
+                                                                            real_t  radRefVertex,
+                                                                            real_t  rayVertex_0,
+                                                                            real_t  rayVertex_1,
+                                                                            real_t  rayVertex_2,
+                                                                            real_t  refVertex_0,
+                                                                            real_t  refVertex_1,
+                                                                            real_t  refVertex_2,
+                                                                            real_t  thrVertex_0,
+                                                                            real_t  thrVertex_1,
+                                                                            real_t  thrVertex_2 ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp
index afce6952..481eb1dd 100644
--- a/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp
+++ b/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp
@@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ToP2ElementwiseGradient_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -213,7 +214,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ToP2ElementwiseGradient_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -226,6 +227,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -291,7 +293,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -311,6 +313,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -340,7 +343,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -354,6 +357,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp
index b7dee5e8..f0c11fa6 100644
--- a/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp
+++ b/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -74,88 +76,107 @@ class P1ToP2ElementwiseGradient_0_0 : public Operator< P1Function< real_t >, P2F
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ToP2ElementwiseGradient_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    116     144      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                      real_t* RESTRICT _data_dstVertex,
+                                                      real_t* RESTRICT _data_src,
+                                                      real_t           macro_vertex_coord_id_0comp0,
+                                                      real_t           macro_vertex_coord_id_0comp1,
+                                                      real_t           macro_vertex_coord_id_1comp0,
+                                                      real_t           macro_vertex_coord_id_1comp1,
+                                                      real_t           macro_vertex_coord_id_2comp0,
+                                                      real_t           macro_vertex_coord_id_2comp1,
+                                                      int64_t          micro_edges_per_macro_edge,
+                                                      real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ToP2ElementwiseGradient_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    345     380      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                      real_t* RESTRICT _data_dstVertex,
+                                                      real_t* RESTRICT _data_src,
+                                                      real_t           macro_vertex_coord_id_0comp0,
+                                                      real_t           macro_vertex_coord_id_0comp1,
+                                                      real_t           macro_vertex_coord_id_0comp2,
+                                                      real_t           macro_vertex_coord_id_1comp0,
+                                                      real_t           macro_vertex_coord_id_1comp1,
+                                                      real_t           macro_vertex_coord_id_1comp2,
+                                                      real_t           macro_vertex_coord_id_2comp0,
+                                                      real_t           macro_vertex_coord_id_2comp1,
+                                                      real_t           macro_vertex_coord_id_2comp2,
+                                                      real_t           macro_vertex_coord_id_3comp0,
+                                                      real_t           macro_vertex_coord_id_3comp1,
+                                                      real_t           macro_vertex_coord_id_3comp2,
+                                                      int64_t          micro_edges_per_macro_edge,
+                                                      real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ToP2ElementwiseGradient_0_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     98     126      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                         idx_t* RESTRICT                      _data_dstVertex,
+                                                         idx_t* RESTRICT                      _data_src,
+                                                         real_t                               macro_vertex_coord_id_0comp0,
+                                                         real_t                               macro_vertex_coord_id_0comp1,
+                                                         real_t                               macro_vertex_coord_id_1comp0,
+                                                         real_t                               macro_vertex_coord_id_1comp1,
+                                                         real_t                               macro_vertex_coord_id_2comp0,
+                                                         real_t                               macro_vertex_coord_id_2comp1,
+                                                         std::shared_ptr< SparseMatrixProxy > mat,
+                                                         int64_t                              micro_edges_per_macro_edge,
+                                                         real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ToP2ElementwiseGradient_0_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    305     340      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                         idx_t* RESTRICT                      _data_dstVertex,
+                                                         idx_t* RESTRICT                      _data_src,
+                                                         real_t                               macro_vertex_coord_id_0comp0,
+                                                         real_t                               macro_vertex_coord_id_0comp1,
+                                                         real_t                               macro_vertex_coord_id_0comp2,
+                                                         real_t                               macro_vertex_coord_id_1comp0,
+                                                         real_t                               macro_vertex_coord_id_1comp1,
+                                                         real_t                               macro_vertex_coord_id_1comp2,
+                                                         real_t                               macro_vertex_coord_id_2comp0,
+                                                         real_t                               macro_vertex_coord_id_2comp1,
+                                                         real_t                               macro_vertex_coord_id_2comp2,
+                                                         real_t                               macro_vertex_coord_id_3comp0,
+                                                         real_t                               macro_vertex_coord_id_3comp1,
+                                                         real_t                               macro_vertex_coord_id_3comp2,
+                                                         std::shared_ptr< SparseMatrixProxy > mat,
+                                                         int64_t                              micro_edges_per_macro_edge,
+                                                         real_t micro_edges_per_macro_edge_float ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp
index 9f65cb11..c60d6176 100644
--- a/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp
+++ b/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp
@@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ToP2ElementwiseGradient_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -213,7 +214,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ToP2ElementwiseGradient_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -226,6 +227,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -291,7 +293,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -311,6 +313,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -340,7 +343,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -354,6 +357,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp
index ee0af312..8f976754 100644
--- a/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp
+++ b/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -74,88 +76,107 @@ class P1ToP2ElementwiseGradient_1_0 : public Operator< P1Function< real_t >, P2F
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ToP2ElementwiseGradient_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    116     144      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                      real_t* RESTRICT _data_dstVertex,
+                                                      real_t* RESTRICT _data_src,
+                                                      real_t           macro_vertex_coord_id_0comp0,
+                                                      real_t           macro_vertex_coord_id_0comp1,
+                                                      real_t           macro_vertex_coord_id_1comp0,
+                                                      real_t           macro_vertex_coord_id_1comp1,
+                                                      real_t           macro_vertex_coord_id_2comp0,
+                                                      real_t           macro_vertex_coord_id_2comp1,
+                                                      int64_t          micro_edges_per_macro_edge,
+                                                      real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ToP2ElementwiseGradient_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    345     380      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                      real_t* RESTRICT _data_dstVertex,
+                                                      real_t* RESTRICT _data_src,
+                                                      real_t           macro_vertex_coord_id_0comp0,
+                                                      real_t           macro_vertex_coord_id_0comp1,
+                                                      real_t           macro_vertex_coord_id_0comp2,
+                                                      real_t           macro_vertex_coord_id_1comp0,
+                                                      real_t           macro_vertex_coord_id_1comp1,
+                                                      real_t           macro_vertex_coord_id_1comp2,
+                                                      real_t           macro_vertex_coord_id_2comp0,
+                                                      real_t           macro_vertex_coord_id_2comp1,
+                                                      real_t           macro_vertex_coord_id_2comp2,
+                                                      real_t           macro_vertex_coord_id_3comp0,
+                                                      real_t           macro_vertex_coord_id_3comp1,
+                                                      real_t           macro_vertex_coord_id_3comp2,
+                                                      int64_t          micro_edges_per_macro_edge,
+                                                      real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ToP2ElementwiseGradient_1_0
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     98     126      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                         idx_t* RESTRICT                      _data_dstVertex,
+                                                         idx_t* RESTRICT                      _data_src,
+                                                         real_t                               macro_vertex_coord_id_0comp0,
+                                                         real_t                               macro_vertex_coord_id_0comp1,
+                                                         real_t                               macro_vertex_coord_id_1comp0,
+                                                         real_t                               macro_vertex_coord_id_1comp1,
+                                                         real_t                               macro_vertex_coord_id_2comp0,
+                                                         real_t                               macro_vertex_coord_id_2comp1,
+                                                         std::shared_ptr< SparseMatrixProxy > mat,
+                                                         int64_t                              micro_edges_per_macro_edge,
+                                                         real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ToP2ElementwiseGradient_1_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    305     340      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                         idx_t* RESTRICT                      _data_dstVertex,
+                                                         idx_t* RESTRICT                      _data_src,
+                                                         real_t                               macro_vertex_coord_id_0comp0,
+                                                         real_t                               macro_vertex_coord_id_0comp1,
+                                                         real_t                               macro_vertex_coord_id_0comp2,
+                                                         real_t                               macro_vertex_coord_id_1comp0,
+                                                         real_t                               macro_vertex_coord_id_1comp1,
+                                                         real_t                               macro_vertex_coord_id_1comp2,
+                                                         real_t                               macro_vertex_coord_id_2comp0,
+                                                         real_t                               macro_vertex_coord_id_2comp1,
+                                                         real_t                               macro_vertex_coord_id_2comp2,
+                                                         real_t                               macro_vertex_coord_id_3comp0,
+                                                         real_t                               macro_vertex_coord_id_3comp1,
+                                                         real_t                               macro_vertex_coord_id_3comp2,
+                                                         std::shared_ptr< SparseMatrixProxy > mat,
+                                                         int64_t                              micro_edges_per_macro_edge,
+                                                         real_t micro_edges_per_macro_edge_float ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp
index ffa952cc..80d01f0c 100644
--- a/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp
+++ b/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp
@@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_2_0::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ToP2ElementwiseGradient_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_2_0::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -220,7 +221,7 @@ void P1ToP2ElementwiseGradient_2_0::toMatrix( const std::shared_ptr< SparseMatri
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -240,6 +241,7 @@ void P1ToP2ElementwiseGradient_2_0::toMatrix( const std::shared_ptr< SparseMatri
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp
index 7f518871..810b24c2 100644
--- a/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp
+++ b/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -74,53 +76,62 @@ class P1ToP2ElementwiseGradient_2_0 : public Operator< P1Function< real_t >, P2F
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ToP2ElementwiseGradient_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    345     380      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                      real_t* RESTRICT _data_dstVertex,
+                                                      real_t* RESTRICT _data_src,
+                                                      real_t           macro_vertex_coord_id_0comp0,
+                                                      real_t           macro_vertex_coord_id_0comp1,
+                                                      real_t           macro_vertex_coord_id_0comp2,
+                                                      real_t           macro_vertex_coord_id_1comp0,
+                                                      real_t           macro_vertex_coord_id_1comp1,
+                                                      real_t           macro_vertex_coord_id_1comp2,
+                                                      real_t           macro_vertex_coord_id_2comp0,
+                                                      real_t           macro_vertex_coord_id_2comp1,
+                                                      real_t           macro_vertex_coord_id_2comp2,
+                                                      real_t           macro_vertex_coord_id_3comp0,
+                                                      real_t           macro_vertex_coord_id_3comp1,
+                                                      real_t           macro_vertex_coord_id_3comp2,
+                                                      int64_t          micro_edges_per_macro_edge,
+                                                      real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ToP2ElementwiseGradient_2_0
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   CUBES
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    305     340      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                         idx_t* RESTRICT                      _data_dstVertex,
+                                                         idx_t* RESTRICT                      _data_src,
+                                                         real_t                               macro_vertex_coord_id_0comp0,
+                                                         real_t                               macro_vertex_coord_id_0comp1,
+                                                         real_t                               macro_vertex_coord_id_0comp2,
+                                                         real_t                               macro_vertex_coord_id_1comp0,
+                                                         real_t                               macro_vertex_coord_id_1comp1,
+                                                         real_t                               macro_vertex_coord_id_1comp2,
+                                                         real_t                               macro_vertex_coord_id_2comp0,
+                                                         real_t                               macro_vertex_coord_id_2comp1,
+                                                         real_t                               macro_vertex_coord_id_2comp2,
+                                                         real_t                               macro_vertex_coord_id_3comp0,
+                                                         real_t                               macro_vertex_coord_id_3comp1,
+                                                         real_t                               macro_vertex_coord_id_3comp2,
+                                                         std::shared_ptr< SparseMatrixProxy > mat,
+                                                         int64_t                              micro_edges_per_macro_edge,
+                                                         real_t micro_edges_per_macro_edge_float ) const;
 };
 
 } // namespace operatorgeneration
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
index 56fce1cf..68d512a9 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
index 47d6b55f..ac6c2dca 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
index 657796e4..f08adf58 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
index d94aa200..2b92e6b7 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
index 808dafe1..f0e17ec3 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
index 2cea1726..0b4dbee5 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
index 38b98cf7..25e43913 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
index e446fff4..1eb2891d 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
index 3225debc..cef4dfa4 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp
rename to operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
index 25c56d96..042040bb 100644
--- a/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_2_0::apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
similarity index 98%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
index b4312145..f935d21a 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
similarity index 98%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
index 28bd7239..9588c7cd 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
similarity index 98%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
index 60b23b46..6288dadc 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
similarity index 98%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
index ce869eef..9d12e824 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
index d61d2c99..2eab9008 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
index 41e1786a..254485aa 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
index 14ec97f8..5b3a5aa4 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
index 1cc81709..3b0b1eb0 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
index d45cc629..a0a11ee9 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
index 980a97ef..3f146ad5 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
similarity index 98%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
index d71bcbed..f2b2c2e1 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
index c67eeed4..1c9b0439 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_0_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
similarity index 98%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
index f2d8391d..e5020c45 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_0_0::toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
index 068e344b..32469538 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_0_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_0_0::toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
similarity index 98%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
index 0589a1df..1049fd8b 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
index 410c95a8..83284af5 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_1_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
similarity index 98%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
index ce8b5c70..e934ca93 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_1_0::toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
index 657f8d54..ba0b7ad9 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_1_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_1_0::toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
index ea2712d2..4f0c7b74 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_2_0::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_2_0::apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
similarity index 99%
rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp
rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
index 658ef807..cba14647 100644
--- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ToP2ElementwiseGradient_2_0::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ToP2ElementwiseGradient_2_0::toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
diff --git a/operators/k_mass/CMakeLists.txt b/operators/k_mass/CMakeLists.txt
index 71f23f38..51d608a7 100644
--- a/operators/k_mass/CMakeLists.txt
+++ b/operators/k_mass/CMakeLists.txt
@@ -23,62 +23,62 @@ add_library( opgen-k_mass
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-k_mass PRIVATE
 
-      avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P1ElementwiseKMass_apply_macro_2D.cpp
-      avx/P1ElementwiseKMass_apply_macro_3D.cpp
-      avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseKMass_apply_macro_2D.cpp
-      avx/P2ElementwiseKMass_apply_macro_3D.cpp
-      avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp
-      noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp
+      avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
+      avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
+      avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
+      avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
+      avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
+      avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
+      avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
+      avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
+      avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
+      avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
+      noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp
+      noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp
+      noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp
+      noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp
+      noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp
+      noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P1ElementwiseKMass_apply_macro_2D.cpp
-      avx/P1ElementwiseKMass_apply_macro_3D.cpp
-      avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseKMass_apply_macro_2D.cpp
-      avx/P2ElementwiseKMass_apply_macro_3D.cpp
-      avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp
-      avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp
+      avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
+      avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
+      avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
+      avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
+      avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
+      avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
+      avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
+      avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
+      avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
+      avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -89,38 +89,38 @@ else()
 
    target_sources(opgen-k_mass PRIVATE
 
-      noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P1ElementwiseKMass_apply_macro_2D.cpp
-      noarch/P1ElementwiseKMass_apply_macro_3D.cpp
-      noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseKMass_apply_macro_2D.cpp
-      noarch/P2ElementwiseKMass_apply_macro_3D.cpp
-      noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
-      noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp
-      noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp
-      noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp
-      noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp
+      noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
+      noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
+      noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
+      noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
+      noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp
+      noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp
+      noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
+      noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
+      noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
+      noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
+      noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp
+      noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp
+      noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
+      noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
+      noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
+      noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp
+      noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/k_mass/P1ElementwiseKMass.cpp b/operators/k_mass/P1ElementwiseKMass.cpp
index 03b70d41..a77a51b2 100644
--- a/operators/k_mass/P1ElementwiseKMass.cpp
+++ b/operators/k_mass/P1ElementwiseKMass.cpp
@@ -133,7 +133,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ElementwiseKMass_macro_3D(
 
              _data_dst,
              _data_k,
@@ -152,6 +152,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -200,7 +201,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ElementwiseKMass_macro_2D(
 
              _data_dst,
              _data_k,
@@ -213,6 +214,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -276,7 +278,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ElementwiseKMass_macro_3D(
 
              _data_dst,
              _data_k,
@@ -296,6 +298,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -325,7 +328,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ElementwiseKMass_macro_2D(
 
              _data_dst,
              _data_k,
@@ -339,6 +342,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -390,7 +394,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D(
 
                 _data_invDiag_,
                 _data_k,
@@ -408,6 +412,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -447,7 +452,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D(
 
                 _data_invDiag_,
                 _data_k,
@@ -459,6 +464,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/k_mass/P1ElementwiseKMass.hpp b/operators/k_mass/P1ElementwiseKMass.hpp
index c79a7498..9252db2e 100644
--- a/operators/k_mass/P1ElementwiseKMass.hpp
+++ b/operators/k_mass/P1ElementwiseKMass.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,126 +84,155 @@ class P1ElementwiseKMass : public Operator< P1Function< real_t >, P1Function< re
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ElementwiseKMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     81      73      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_k,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dst,
+                                           real_t* RESTRICT _data_k,
+                                           real_t* RESTRICT _data_src,
+                                           real_t           macro_vertex_coord_id_0comp0,
+                                           real_t           macro_vertex_coord_id_0comp1,
+                                           real_t           macro_vertex_coord_id_1comp0,
+                                           real_t           macro_vertex_coord_id_1comp1,
+                                           real_t           macro_vertex_coord_id_2comp0,
+                                           real_t           macro_vertex_coord_id_2comp1,
+                                           int64_t          micro_edges_per_macro_edge,
+                                           real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseKMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    189     163      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_k,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dst,
+                                           real_t* RESTRICT _data_k,
+                                           real_t* RESTRICT _data_src,
+                                           real_t           macro_vertex_coord_id_0comp0,
+                                           real_t           macro_vertex_coord_id_0comp1,
+                                           real_t           macro_vertex_coord_id_0comp2,
+                                           real_t           macro_vertex_coord_id_1comp0,
+                                           real_t           macro_vertex_coord_id_1comp1,
+                                           real_t           macro_vertex_coord_id_1comp2,
+                                           real_t           macro_vertex_coord_id_2comp0,
+                                           real_t           macro_vertex_coord_id_2comp1,
+                                           real_t           macro_vertex_coord_id_2comp2,
+                                           real_t           macro_vertex_coord_id_3comp0,
+                                           real_t           macro_vertex_coord_id_3comp1,
+                                           real_t           macro_vertex_coord_id_3comp2,
+                                           int64_t          micro_edges_per_macro_edge,
+                                           real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseKMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     72      64      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_k,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P1ElementwiseKMass_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                              real_t* RESTRICT                     _data_k,
+                                              idx_t* RESTRICT                      _data_src,
+                                              real_t                               macro_vertex_coord_id_0comp0,
+                                              real_t                               macro_vertex_coord_id_0comp1,
+                                              real_t                               macro_vertex_coord_id_1comp0,
+                                              real_t                               macro_vertex_coord_id_1comp1,
+                                              real_t                               macro_vertex_coord_id_2comp0,
+                                              real_t                               macro_vertex_coord_id_2comp1,
+                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                              int64_t                              micro_edges_per_macro_edge,
+                                              real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseKMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    173     147      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_k,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P1ElementwiseKMass_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                              real_t* RESTRICT                     _data_k,
+                                              idx_t* RESTRICT                      _data_src,
+                                              real_t                               macro_vertex_coord_id_0comp0,
+                                              real_t                               macro_vertex_coord_id_0comp1,
+                                              real_t                               macro_vertex_coord_id_0comp2,
+                                              real_t                               macro_vertex_coord_id_1comp0,
+                                              real_t                               macro_vertex_coord_id_1comp1,
+                                              real_t                               macro_vertex_coord_id_1comp2,
+                                              real_t                               macro_vertex_coord_id_2comp0,
+                                              real_t                               macro_vertex_coord_id_2comp1,
+                                              real_t                               macro_vertex_coord_id_2comp2,
+                                              real_t                               macro_vertex_coord_id_3comp0,
+                                              real_t                               macro_vertex_coord_id_3comp1,
+                                              real_t                               macro_vertex_coord_id_3comp2,
+                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                              int64_t                              micro_edges_per_macro_edge,
+                                              real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseKMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     63      52      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t* RESTRICT _data_k,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_invDiag_,
+                                                                          real_t* RESTRICT _data_k,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          int64_t          micro_edges_per_macro_edge,
+                                                                          real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseKMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    147     117      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t* RESTRICT _data_k,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_invDiag_,
+                                                                          real_t* RESTRICT _data_k,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          int64_t          micro_edges_per_macro_edge,
+                                                                          real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P1Function< real_t > > invDiag_;
    P1Function< real_t >                    k;
diff --git a/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp
index 68c62d7d..28e01722 100644
--- a/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp
+++ b/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp
@@ -135,7 +135,7 @@ void P1ElementwiseKMassAnnulusMap::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ElementwiseKMassAnnulusMap_macro_2D(
 
              _data_dst,
              _data_k,
@@ -156,6 +156,7 @@ void P1ElementwiseKMassAnnulusMap::apply( const P1Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -232,7 +233,7 @@ void P1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D(
 
              _data_dst,
              _data_k,
@@ -254,6 +255,7 @@ void P1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -319,7 +321,7 @@ void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D(
 
                 _data_invDiag_,
                 _data_k,
@@ -339,6 +341,7 @@ void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues()
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp
index e8412918..bf249e60 100644
--- a/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp
+++ b/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,81 +85,95 @@ class P1ElementwiseKMassAnnulusMap : public Operator< P1Function< real_t >, P1Fu
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ElementwiseKMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    333     513      24      20      4              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_k,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dst,
+                                                     real_t* RESTRICT _data_k,
+                                                     real_t* RESTRICT _data_src,
+                                                     real_t           macro_vertex_coord_id_0comp0,
+                                                     real_t           macro_vertex_coord_id_0comp1,
+                                                     real_t           macro_vertex_coord_id_1comp0,
+                                                     real_t           macro_vertex_coord_id_1comp1,
+                                                     real_t           macro_vertex_coord_id_2comp0,
+                                                     real_t           macro_vertex_coord_id_2comp1,
+                                                     int64_t          micro_edges_per_macro_edge,
+                                                     real_t           micro_edges_per_macro_edge_float,
+                                                     real_t           radRayVertex,
+                                                     real_t           radRefVertex,
+                                                     real_t           rayVertex_0,
+                                                     real_t           rayVertex_1,
+                                                     real_t           refVertex_0,
+                                                     real_t           refVertex_1,
+                                                     real_t           thrVertex_0,
+                                                     real_t           thrVertex_1 ) const;
+
+   /// Integral: P1ElementwiseKMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    324     504      24      20      4              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_k,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                                        real_t* RESTRICT                     _data_k,
+                                                        idx_t* RESTRICT                      _data_src,
+                                                        real_t                               macro_vertex_coord_id_0comp0,
+                                                        real_t                               macro_vertex_coord_id_0comp1,
+                                                        real_t                               macro_vertex_coord_id_1comp0,
+                                                        real_t                               macro_vertex_coord_id_1comp1,
+                                                        real_t                               macro_vertex_coord_id_2comp0,
+                                                        real_t                               macro_vertex_coord_id_2comp1,
+                                                        std::shared_ptr< SparseMatrixProxy > mat,
+                                                        int64_t                              micro_edges_per_macro_edge,
+                                                        real_t                               micro_edges_per_macro_edge_float,
+                                                        real_t                               radRayVertex,
+                                                        real_t                               radRefVertex,
+                                                        real_t                               rayVertex_0,
+                                                        real_t                               rayVertex_1,
+                                                        real_t                               refVertex_0,
+                                                        real_t                               refVertex_1,
+                                                        real_t                               thrVertex_0,
+                                                        real_t                               thrVertex_1 ) const;
+
+   /// Integral: P1ElementwiseKMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    315     492      24      20      4              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t* RESTRICT _data_k,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_,
+                                                                                    real_t* RESTRICT _data_k,
+                                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                                    real_t micro_edges_per_macro_edge_float,
+                                                                                    real_t radRayVertex,
+                                                                                    real_t radRefVertex,
+                                                                                    real_t rayVertex_0,
+                                                                                    real_t rayVertex_1,
+                                                                                    real_t refVertex_0,
+                                                                                    real_t refVertex_1,
+                                                                                    real_t thrVertex_0,
+                                                                                    real_t thrVertex_1 ) const;
 
    std::shared_ptr< P1Function< real_t > > invDiag_;
    P1Function< real_t >                    k;
diff --git a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp
index d9b48d04..3eeda4e4 100644
--- a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp
+++ b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp
@@ -149,7 +149,7 @@ void P1ElementwiseKMassIcosahedralShellMap::apply( const P1Function< real_t >& s
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D(
 
              _data_dst,
              _data_k,
@@ -182,6 +182,7 @@ void P1ElementwiseKMassIcosahedralShellMap::apply( const P1Function< real_t >& s
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -267,7 +268,7 @@ void P1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D(
 
              _data_dst,
              _data_k,
@@ -301,6 +302,7 @@ void P1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -377,7 +379,7 @@ void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D(
 
                 _data_invDiag_,
                 _data_k,
@@ -409,6 +411,7 @@ void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp
index c501ad59..dead3823 100644
--- a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp
+++ b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,117 +85,132 @@ class P1ElementwiseKMassIcosahedralShellMap : public Operator< P1Function< real_
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ElementwiseKMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    904    1543      51      15      5              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_k,
-                        real_t* RESTRICT _data_src,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst,
+                                                              real_t* RESTRICT _data_k,
+                                                              real_t* RESTRICT _data_src,
+                                                              real_t           forVertex_0,
+                                                              real_t           forVertex_1,
+                                                              real_t           forVertex_2,
+                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                              real_t           macro_vertex_coord_id_0comp2,
+                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                              real_t           macro_vertex_coord_id_1comp2,
+                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                              real_t           macro_vertex_coord_id_2comp2,
+                                                              real_t           macro_vertex_coord_id_3comp0,
+                                                              real_t           macro_vertex_coord_id_3comp1,
+                                                              real_t           macro_vertex_coord_id_3comp2,
+                                                              int64_t          micro_edges_per_macro_edge,
+                                                              real_t           micro_edges_per_macro_edge_float,
+                                                              real_t           radRayVertex,
+                                                              real_t           radRefVertex,
+                                                              real_t           rayVertex_0,
+                                                              real_t           rayVertex_1,
+                                                              real_t           rayVertex_2,
+                                                              real_t           refVertex_0,
+                                                              real_t           refVertex_1,
+                                                              real_t           refVertex_2,
+                                                              real_t           thrVertex_0,
+                                                              real_t           thrVertex_1,
+                                                              real_t           thrVertex_2 ) const;
+
+   /// Integral: P1ElementwiseKMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    888    1527      51      15      5              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_k,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT  _data_dst,
+                                                                 real_t* RESTRICT _data_k,
+                                                                 idx_t* RESTRICT  _data_src,
+                                                                 real_t           forVertex_0,
+                                                                 real_t           forVertex_1,
+                                                                 real_t           forVertex_2,
+                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                                 int64_t                              micro_edges_per_macro_edge,
+                                                                 real_t micro_edges_per_macro_edge_float,
+                                                                 real_t radRayVertex,
+                                                                 real_t radRefVertex,
+                                                                 real_t rayVertex_0,
+                                                                 real_t rayVertex_1,
+                                                                 real_t rayVertex_2,
+                                                                 real_t refVertex_0,
+                                                                 real_t refVertex_1,
+                                                                 real_t refVertex_2,
+                                                                 real_t thrVertex_0,
+                                                                 real_t thrVertex_1,
+                                                                 real_t thrVertex_2 ) const;
+
+   /// Integral: P1ElementwiseKMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    862    1497      51      15      5              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t* RESTRICT _data_k,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D(
+       real_t* RESTRICT _data_invDiag_,
+       real_t* RESTRICT _data_k,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P1Function< real_t > > invDiag_;
    P1Function< real_t >                    k;
diff --git a/operators/k_mass/P2ElementwiseKMass.cpp b/operators/k_mass/P2ElementwiseKMass.cpp
index dd4df2bc..46aa8ff2 100644
--- a/operators/k_mass/P2ElementwiseKMass.cpp
+++ b/operators/k_mass/P2ElementwiseKMass.cpp
@@ -137,7 +137,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseKMass_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -159,6 +159,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -228,7 +229,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseKMass_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -244,6 +245,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -314,7 +316,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseKMass_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -337,6 +339,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +372,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseKMass_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -386,6 +389,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -440,7 +444,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -460,6 +464,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -504,7 +509,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -518,6 +523,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/k_mass/P2ElementwiseKMass.hpp b/operators/k_mass/P2ElementwiseKMass.hpp
index 9de04491..89469870 100644
--- a/operators/k_mass/P2ElementwiseKMass.hpp
+++ b/operators/k_mass/P2ElementwiseKMass.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -82,142 +84,171 @@ class P2ElementwiseKMass : public Operator< P2Function< real_t >, P2Function< re
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseKMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    290     300      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                           real_t* RESTRICT _data_dstVertex,
+                                           real_t* RESTRICT _data_kEdge,
+                                           real_t* RESTRICT _data_kVertex,
+                                           real_t* RESTRICT _data_srcEdge,
+                                           real_t* RESTRICT _data_srcVertex,
+                                           real_t           macro_vertex_coord_id_0comp0,
+                                           real_t           macro_vertex_coord_id_0comp1,
+                                           real_t           macro_vertex_coord_id_1comp0,
+                                           real_t           macro_vertex_coord_id_1comp1,
+                                           real_t           macro_vertex_coord_id_2comp0,
+                                           real_t           macro_vertex_coord_id_2comp1,
+                                           int64_t          micro_edges_per_macro_edge,
+                                           real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseKMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1117    1118      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                           real_t* RESTRICT _data_dstVertex,
+                                           real_t* RESTRICT _data_kEdge,
+                                           real_t* RESTRICT _data_kVertex,
+                                           real_t* RESTRICT _data_srcEdge,
+                                           real_t* RESTRICT _data_srcVertex,
+                                           real_t           macro_vertex_coord_id_0comp0,
+                                           real_t           macro_vertex_coord_id_0comp1,
+                                           real_t           macro_vertex_coord_id_0comp2,
+                                           real_t           macro_vertex_coord_id_1comp0,
+                                           real_t           macro_vertex_coord_id_1comp1,
+                                           real_t           macro_vertex_coord_id_1comp2,
+                                           real_t           macro_vertex_coord_id_2comp0,
+                                           real_t           macro_vertex_coord_id_2comp1,
+                                           real_t           macro_vertex_coord_id_2comp2,
+                                           real_t           macro_vertex_coord_id_3comp0,
+                                           real_t           macro_vertex_coord_id_3comp1,
+                                           real_t           macro_vertex_coord_id_3comp2,
+                                           int64_t          micro_edges_per_macro_edge,
+                                           real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseKMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    254     264      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseKMass_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                              idx_t* RESTRICT                      _data_dstVertex,
+                                              real_t* RESTRICT                     _data_kEdge,
+                                              real_t* RESTRICT                     _data_kVertex,
+                                              idx_t* RESTRICT                      _data_srcEdge,
+                                              idx_t* RESTRICT                      _data_srcVertex,
+                                              real_t                               macro_vertex_coord_id_0comp0,
+                                              real_t                               macro_vertex_coord_id_0comp1,
+                                              real_t                               macro_vertex_coord_id_1comp0,
+                                              real_t                               macro_vertex_coord_id_1comp1,
+                                              real_t                               macro_vertex_coord_id_2comp0,
+                                              real_t                               macro_vertex_coord_id_2comp1,
+                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                              int64_t                              micro_edges_per_macro_edge,
+                                              real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseKMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1017    1018      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseKMass_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                              idx_t* RESTRICT                      _data_dstVertex,
+                                              real_t* RESTRICT                     _data_kEdge,
+                                              real_t* RESTRICT                     _data_kVertex,
+                                              idx_t* RESTRICT                      _data_srcEdge,
+                                              idx_t* RESTRICT                      _data_srcVertex,
+                                              real_t                               macro_vertex_coord_id_0comp0,
+                                              real_t                               macro_vertex_coord_id_0comp1,
+                                              real_t                               macro_vertex_coord_id_0comp2,
+                                              real_t                               macro_vertex_coord_id_1comp0,
+                                              real_t                               macro_vertex_coord_id_1comp1,
+                                              real_t                               macro_vertex_coord_id_1comp2,
+                                              real_t                               macro_vertex_coord_id_2comp0,
+                                              real_t                               macro_vertex_coord_id_2comp1,
+                                              real_t                               macro_vertex_coord_id_2comp2,
+                                              real_t                               macro_vertex_coord_id_3comp0,
+                                              real_t                               macro_vertex_coord_id_3comp1,
+                                              real_t                               macro_vertex_coord_id_3comp2,
+                                              std::shared_ptr< SparseMatrixProxy > mat,
+                                              int64_t                              micro_edges_per_macro_edge,
+                                              real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseKMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    170     174      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_kEdge,
-                                                       real_t* RESTRICT _data_kVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                          real_t* RESTRICT _data_invDiag_Vertex,
+                                                                          real_t* RESTRICT _data_kEdge,
+                                                                          real_t* RESTRICT _data_kVertex,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          int64_t          micro_edges_per_macro_edge,
+                                                                          real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseKMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    532     523      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_kEdge,
-                                                       real_t* RESTRICT _data_kVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                          real_t* RESTRICT _data_invDiag_Vertex,
+                                                                          real_t* RESTRICT _data_kEdge,
+                                                                          real_t* RESTRICT _data_kVertex,
+                                                                          real_t           macro_vertex_coord_id_0comp0,
+                                                                          real_t           macro_vertex_coord_id_0comp1,
+                                                                          real_t           macro_vertex_coord_id_0comp2,
+                                                                          real_t           macro_vertex_coord_id_1comp0,
+                                                                          real_t           macro_vertex_coord_id_1comp1,
+                                                                          real_t           macro_vertex_coord_id_1comp2,
+                                                                          real_t           macro_vertex_coord_id_2comp0,
+                                                                          real_t           macro_vertex_coord_id_2comp1,
+                                                                          real_t           macro_vertex_coord_id_2comp2,
+                                                                          real_t           macro_vertex_coord_id_3comp0,
+                                                                          real_t           macro_vertex_coord_id_3comp1,
+                                                                          real_t           macro_vertex_coord_id_3comp2,
+                                                                          int64_t          micro_edges_per_macro_edge,
+                                                                          real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    k;
diff --git a/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp
index dac8d5ff..d5318c89 100644
--- a/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp
+++ b/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp
@@ -149,7 +149,7 @@ void P2ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseKMassAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -173,6 +173,7 @@ void P2ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -256,7 +257,7 @@ void P2ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -281,6 +282,7 @@ void P2ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -349,7 +351,7 @@ void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -371,6 +373,7 @@ void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues()
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp
index 49ab3356..1964dced 100644
--- a/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp
+++ b/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,89 +85,103 @@ class P2ElementwiseKMassAnnulusMap : public Operator< P2Function< real_t >, P2Fu
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseKMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    668     960      30      30      6              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                     real_t* RESTRICT _data_dstVertex,
+                                                     real_t* RESTRICT _data_kEdge,
+                                                     real_t* RESTRICT _data_kVertex,
+                                                     real_t* RESTRICT _data_srcEdge,
+                                                     real_t* RESTRICT _data_srcVertex,
+                                                     real_t           macro_vertex_coord_id_0comp0,
+                                                     real_t           macro_vertex_coord_id_0comp1,
+                                                     real_t           macro_vertex_coord_id_1comp0,
+                                                     real_t           macro_vertex_coord_id_1comp1,
+                                                     real_t           macro_vertex_coord_id_2comp0,
+                                                     real_t           macro_vertex_coord_id_2comp1,
+                                                     int64_t          micro_edges_per_macro_edge,
+                                                     real_t           micro_edges_per_macro_edge_float,
+                                                     real_t           radRayVertex,
+                                                     real_t           radRefVertex,
+                                                     real_t           rayVertex_0,
+                                                     real_t           rayVertex_1,
+                                                     real_t           refVertex_0,
+                                                     real_t           refVertex_1,
+                                                     real_t           thrVertex_0,
+                                                     real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseKMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    632     924      30      30      6              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                        idx_t* RESTRICT                      _data_dstVertex,
+                                                        real_t* RESTRICT                     _data_kEdge,
+                                                        real_t* RESTRICT                     _data_kVertex,
+                                                        idx_t* RESTRICT                      _data_srcEdge,
+                                                        idx_t* RESTRICT                      _data_srcVertex,
+                                                        real_t                               macro_vertex_coord_id_0comp0,
+                                                        real_t                               macro_vertex_coord_id_0comp1,
+                                                        real_t                               macro_vertex_coord_id_1comp0,
+                                                        real_t                               macro_vertex_coord_id_1comp1,
+                                                        real_t                               macro_vertex_coord_id_2comp0,
+                                                        real_t                               macro_vertex_coord_id_2comp1,
+                                                        std::shared_ptr< SparseMatrixProxy > mat,
+                                                        int64_t                              micro_edges_per_macro_edge,
+                                                        real_t                               micro_edges_per_macro_edge_float,
+                                                        real_t                               radRayVertex,
+                                                        real_t                               radRefVertex,
+                                                        real_t                               rayVertex_0,
+                                                        real_t                               rayVertex_1,
+                                                        real_t                               refVertex_0,
+                                                        real_t                               refVertex_1,
+                                                        real_t                               thrVertex_0,
+                                                        real_t                               thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseKMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    548     834      30      30      6              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_kEdge,
-                                                       real_t* RESTRICT _data_kVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                    real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                    real_t* RESTRICT _data_kEdge,
+                                                                                    real_t* RESTRICT _data_kVertex,
+                                                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                                                    int64_t          micro_edges_per_macro_edge,
+                                                                                    real_t micro_edges_per_macro_edge_float,
+                                                                                    real_t radRayVertex,
+                                                                                    real_t radRefVertex,
+                                                                                    real_t rayVertex_0,
+                                                                                    real_t rayVertex_1,
+                                                                                    real_t refVertex_0,
+                                                                                    real_t refVertex_1,
+                                                                                    real_t thrVertex_0,
+                                                                                    real_t thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    k;
diff --git a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp
index 929c124e..6dfcc2e2 100644
--- a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp
+++ b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp
@@ -153,7 +153,7 @@ void P2ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t >& s
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -189,6 +189,7 @@ void P2ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t >& s
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -284,7 +285,7 @@ void P2ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -321,6 +322,7 @@ void P2ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -400,7 +402,7 @@ void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -434,6 +436,7 @@ void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp
index 00dddcfe..8dfcbe8a 100644
--- a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp
+++ b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -83,125 +85,140 @@ class P2ElementwiseKMassIcosahedralShellMap : public Operator< P2Function< real_
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseKMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2690    4154      69      33     11              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                              real_t* RESTRICT _data_dstVertex,
+                                                              real_t* RESTRICT _data_kEdge,
+                                                              real_t* RESTRICT _data_kVertex,
+                                                              real_t* RESTRICT _data_srcEdge,
+                                                              real_t* RESTRICT _data_srcVertex,
+                                                              real_t           forVertex_0,
+                                                              real_t           forVertex_1,
+                                                              real_t           forVertex_2,
+                                                              real_t           macro_vertex_coord_id_0comp0,
+                                                              real_t           macro_vertex_coord_id_0comp1,
+                                                              real_t           macro_vertex_coord_id_0comp2,
+                                                              real_t           macro_vertex_coord_id_1comp0,
+                                                              real_t           macro_vertex_coord_id_1comp1,
+                                                              real_t           macro_vertex_coord_id_1comp2,
+                                                              real_t           macro_vertex_coord_id_2comp0,
+                                                              real_t           macro_vertex_coord_id_2comp1,
+                                                              real_t           macro_vertex_coord_id_2comp2,
+                                                              real_t           macro_vertex_coord_id_3comp0,
+                                                              real_t           macro_vertex_coord_id_3comp1,
+                                                              real_t           macro_vertex_coord_id_3comp2,
+                                                              int64_t          micro_edges_per_macro_edge,
+                                                              real_t           micro_edges_per_macro_edge_float,
+                                                              real_t           radRayVertex,
+                                                              real_t           radRefVertex,
+                                                              real_t           rayVertex_0,
+                                                              real_t           rayVertex_1,
+                                                              real_t           rayVertex_2,
+                                                              real_t           refVertex_0,
+                                                              real_t           refVertex_1,
+                                                              real_t           refVertex_2,
+                                                              real_t           thrVertex_0,
+                                                              real_t           thrVertex_1,
+                                                              real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseKMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2590    4054      69      33     11              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                 idx_t* RESTRICT  _data_dstVertex,
+                                                                 real_t* RESTRICT _data_kEdge,
+                                                                 real_t* RESTRICT _data_kVertex,
+                                                                 idx_t* RESTRICT  _data_srcEdge,
+                                                                 idx_t* RESTRICT  _data_srcVertex,
+                                                                 real_t           forVertex_0,
+                                                                 real_t           forVertex_1,
+                                                                 real_t           forVertex_2,
+                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                                 std::shared_ptr< SparseMatrixProxy > mat,
+                                                                 int64_t                              micro_edges_per_macro_edge,
+                                                                 real_t micro_edges_per_macro_edge_float,
+                                                                 real_t radRayVertex,
+                                                                 real_t radRefVertex,
+                                                                 real_t rayVertex_0,
+                                                                 real_t rayVertex_1,
+                                                                 real_t rayVertex_2,
+                                                                 real_t refVertex_0,
+                                                                 real_t refVertex_1,
+                                                                 real_t refVertex_2,
+                                                                 real_t thrVertex_0,
+                                                                 real_t thrVertex_1,
+                                                                 real_t thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseKMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2105    3559      69      33     11              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_kEdge,
-                                                       real_t* RESTRICT _data_kVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_kEdge,
+       real_t* RESTRICT _data_kVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    k;
diff --git a/operators/k_mass/P2ToP1ElementwiseKMass.cpp b/operators/k_mass/P2ToP1ElementwiseKMass.cpp
index f5cfd0f3..f445349c 100644
--- a/operators/k_mass/P2ToP1ElementwiseKMass.cpp
+++ b/operators/k_mass/P2ToP1ElementwiseKMass.cpp
@@ -135,7 +135,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ToP1ElementwiseKMass_macro_3D(
 
              _data_dst,
              _data_kEdge,
@@ -156,6 +156,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -206,7 +207,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ToP1ElementwiseKMass_macro_2D(
 
              _data_dst,
              _data_kEdge,
@@ -221,6 +222,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -286,7 +288,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ToP1ElementwiseKMass_macro_3D(
 
              _data_dst,
              _data_kEdge,
@@ -308,6 +310,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -339,7 +342,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ToP1ElementwiseKMass_macro_2D(
 
              _data_dst,
              _data_kEdge,
@@ -355,6 +358,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/k_mass/P2ToP1ElementwiseKMass.hpp b/operators/k_mass/P2ToP1ElementwiseKMass.hpp
index b30f8512..aa6a35f2 100644
--- a/operators/k_mass/P2ToP1ElementwiseKMass.hpp
+++ b/operators/k_mass/P2ToP1ElementwiseKMass.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -77,96 +79,115 @@ class P2ToP1ElementwiseKMass : public Operator< P2Function< real_t >, P1Function
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseKMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    254     264      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ToP1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dst,
+                                               real_t* RESTRICT _data_kEdge,
+                                               real_t* RESTRICT _data_kVertex,
+                                               real_t* RESTRICT _data_srcEdge,
+                                               real_t* RESTRICT _data_srcVertex,
+                                               real_t           macro_vertex_coord_id_0comp0,
+                                               real_t           macro_vertex_coord_id_0comp1,
+                                               real_t           macro_vertex_coord_id_1comp0,
+                                               real_t           macro_vertex_coord_id_1comp1,
+                                               real_t           macro_vertex_coord_id_2comp0,
+                                               real_t           macro_vertex_coord_id_2comp1,
+                                               int64_t          micro_edges_per_macro_edge,
+                                               real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseKMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    892     871      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dst,
+                                               real_t* RESTRICT _data_kEdge,
+                                               real_t* RESTRICT _data_kVertex,
+                                               real_t* RESTRICT _data_srcEdge,
+                                               real_t* RESTRICT _data_srcVertex,
+                                               real_t           macro_vertex_coord_id_0comp0,
+                                               real_t           macro_vertex_coord_id_0comp1,
+                                               real_t           macro_vertex_coord_id_0comp2,
+                                               real_t           macro_vertex_coord_id_1comp0,
+                                               real_t           macro_vertex_coord_id_1comp1,
+                                               real_t           macro_vertex_coord_id_1comp2,
+                                               real_t           macro_vertex_coord_id_2comp0,
+                                               real_t           macro_vertex_coord_id_2comp1,
+                                               real_t           macro_vertex_coord_id_2comp2,
+                                               real_t           macro_vertex_coord_id_3comp0,
+                                               real_t           macro_vertex_coord_id_3comp1,
+                                               real_t           macro_vertex_coord_id_3comp2,
+                                               int64_t          micro_edges_per_macro_edge,
+                                               real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseKMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    236     246      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ToP1ElementwiseKMass_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                                  real_t* RESTRICT                     _data_kEdge,
+                                                  real_t* RESTRICT                     _data_kVertex,
+                                                  idx_t* RESTRICT                      _data_srcEdge,
+                                                  idx_t* RESTRICT                      _data_srcVertex,
+                                                  real_t                               macro_vertex_coord_id_0comp0,
+                                                  real_t                               macro_vertex_coord_id_0comp1,
+                                                  real_t                               macro_vertex_coord_id_1comp0,
+                                                  real_t                               macro_vertex_coord_id_1comp1,
+                                                  real_t                               macro_vertex_coord_id_2comp0,
+                                                  real_t                               macro_vertex_coord_id_2comp1,
+                                                  std::shared_ptr< SparseMatrixProxy > mat,
+                                                  int64_t                              micro_edges_per_macro_edge,
+                                                  real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ToP1ElementwiseKMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    852     831      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
+   void toMatrix_P2ToP1ElementwiseKMass_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                                  real_t* RESTRICT                     _data_kEdge,
+                                                  real_t* RESTRICT                     _data_kVertex,
+                                                  idx_t* RESTRICT                      _data_srcEdge,
+                                                  idx_t* RESTRICT                      _data_srcVertex,
+                                                  real_t                               macro_vertex_coord_id_0comp0,
+                                                  real_t                               macro_vertex_coord_id_0comp1,
+                                                  real_t                               macro_vertex_coord_id_0comp2,
+                                                  real_t                               macro_vertex_coord_id_1comp0,
+                                                  real_t                               macro_vertex_coord_id_1comp1,
+                                                  real_t                               macro_vertex_coord_id_1comp2,
+                                                  real_t                               macro_vertex_coord_id_2comp0,
+                                                  real_t                               macro_vertex_coord_id_2comp1,
+                                                  real_t                               macro_vertex_coord_id_2comp2,
+                                                  real_t                               macro_vertex_coord_id_3comp0,
+                                                  real_t                               macro_vertex_coord_id_3comp1,
+                                                  real_t                               macro_vertex_coord_id_3comp2,
+                                                  std::shared_ptr< SparseMatrixProxy > mat,
+                                                  int64_t                              micro_edges_per_macro_edge,
+                                                  real_t                               micro_edges_per_macro_edge_float ) const;
 
    P2Function< real_t > k;
 };
diff --git a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp
index d9170035..a250d3cb 100644
--- a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp
+++ b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp
@@ -137,7 +137,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D(
 
              _data_dst,
              _data_kEdge,
@@ -160,6 +160,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -238,7 +239,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMa
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D(
 
              _data_dst,
              _data_kEdge,
@@ -262,6 +263,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMa
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp
index 8d34dcff..3222cf1b 100644
--- a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp
+++ b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -78,61 +80,70 @@ class P2ToP1ElementwiseKMassAnnulusMap : public Operator< P2Function< real_t >,
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseKMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    632     924      30      30      6              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dst,
+                                                         real_t* RESTRICT _data_kEdge,
+                                                         real_t* RESTRICT _data_kVertex,
+                                                         real_t* RESTRICT _data_srcEdge,
+                                                         real_t* RESTRICT _data_srcVertex,
+                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                         int64_t          micro_edges_per_macro_edge,
+                                                         real_t           micro_edges_per_macro_edge_float,
+                                                         real_t           radRayVertex,
+                                                         real_t           radRefVertex,
+                                                         real_t           rayVertex_0,
+                                                         real_t           rayVertex_1,
+                                                         real_t           refVertex_0,
+                                                         real_t           refVertex_1,
+                                                         real_t           thrVertex_0,
+                                                         real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ToP1ElementwiseKMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    614     906      30      30      6              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
+   void toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                                            real_t* RESTRICT                     _data_kEdge,
+                                                            real_t* RESTRICT                     _data_kVertex,
+                                                            idx_t* RESTRICT                      _data_srcEdge,
+                                                            idx_t* RESTRICT                      _data_srcVertex,
+                                                            real_t                               macro_vertex_coord_id_0comp0,
+                                                            real_t                               macro_vertex_coord_id_0comp1,
+                                                            real_t                               macro_vertex_coord_id_1comp0,
+                                                            real_t                               macro_vertex_coord_id_1comp1,
+                                                            real_t                               macro_vertex_coord_id_2comp0,
+                                                            real_t                               macro_vertex_coord_id_2comp1,
+                                                            std::shared_ptr< SparseMatrixProxy > mat,
+                                                            int64_t                              micro_edges_per_macro_edge,
+                                                            real_t                               micro_edges_per_macro_edge_float,
+                                                            real_t                               radRayVertex,
+                                                            real_t                               radRefVertex,
+                                                            real_t                               rayVertex_0,
+                                                            real_t                               rayVertex_1,
+                                                            real_t                               refVertex_0,
+                                                            real_t                               refVertex_1,
+                                                            real_t                               thrVertex_0,
+                                                            real_t                               thrVertex_1 ) const;
 
    P2Function< real_t > k;
 };
diff --git a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp
index 027f78ad..ecb5c90d 100644
--- a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp
+++ b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp
@@ -152,7 +152,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D(
 
              _data_dst,
              _data_kEdge,
@@ -187,6 +187,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -274,7 +275,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr<
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D(
 
              _data_dst,
              _data_kEdge,
@@ -310,6 +311,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr<
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
diff --git a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp
index 7b5a9afd..3bac6117 100644
--- a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp
+++ b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/p2functionspace/P2Function.hpp"
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -78,85 +80,94 @@ class P2ToP1ElementwiseKMassIcosahedralShellMap : public Operator< P2Function< r
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ToP1ElementwiseKMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2465    3907      69      33     11              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_kEdge,
-                        real_t* RESTRICT _data_kVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst,
+                                                                  real_t* RESTRICT _data_kEdge,
+                                                                  real_t* RESTRICT _data_kVertex,
+                                                                  real_t* RESTRICT _data_srcEdge,
+                                                                  real_t* RESTRICT _data_srcVertex,
+                                                                  real_t           forVertex_0,
+                                                                  real_t           forVertex_1,
+                                                                  real_t           forVertex_2,
+                                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                                  real_t           macro_vertex_coord_id_0comp2,
+                                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                                  real_t           macro_vertex_coord_id_1comp2,
+                                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                                  real_t           macro_vertex_coord_id_2comp2,
+                                                                  real_t           macro_vertex_coord_id_3comp0,
+                                                                  real_t           macro_vertex_coord_id_3comp1,
+                                                                  real_t           macro_vertex_coord_id_3comp2,
+                                                                  int64_t          micro_edges_per_macro_edge,
+                                                                  real_t           micro_edges_per_macro_edge_float,
+                                                                  real_t           radRayVertex,
+                                                                  real_t           radRefVertex,
+                                                                  real_t           rayVertex_0,
+                                                                  real_t           rayVertex_1,
+                                                                  real_t           rayVertex_2,
+                                                                  real_t           refVertex_0,
+                                                                  real_t           refVertex_1,
+                                                                  real_t           refVertex_2,
+                                                                  real_t           thrVertex_0,
+                                                                  real_t           thrVertex_1,
+                                                                  real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ToP1ElementwiseKMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2425    3867      69      33     11              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           real_t* RESTRICT                     _data_kEdge,
-                           real_t* RESTRICT                     _data_kVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
+   void toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT  _data_dst,
+                                                                     real_t* RESTRICT _data_kEdge,
+                                                                     real_t* RESTRICT _data_kVertex,
+                                                                     idx_t* RESTRICT  _data_srcEdge,
+                                                                     idx_t* RESTRICT  _data_srcVertex,
+                                                                     real_t           forVertex_0,
+                                                                     real_t           forVertex_1,
+                                                                     real_t           forVertex_2,
+                                                                     real_t           macro_vertex_coord_id_0comp0,
+                                                                     real_t           macro_vertex_coord_id_0comp1,
+                                                                     real_t           macro_vertex_coord_id_0comp2,
+                                                                     real_t           macro_vertex_coord_id_1comp0,
+                                                                     real_t           macro_vertex_coord_id_1comp1,
+                                                                     real_t           macro_vertex_coord_id_1comp2,
+                                                                     real_t           macro_vertex_coord_id_2comp0,
+                                                                     real_t           macro_vertex_coord_id_2comp1,
+                                                                     real_t           macro_vertex_coord_id_2comp2,
+                                                                     real_t           macro_vertex_coord_id_3comp0,
+                                                                     real_t           macro_vertex_coord_id_3comp1,
+                                                                     real_t           macro_vertex_coord_id_3comp2,
+                                                                     std::shared_ptr< SparseMatrixProxy > mat,
+                                                                     int64_t micro_edges_per_macro_edge,
+                                                                     real_t  micro_edges_per_macro_edge_float,
+                                                                     real_t  radRayVertex,
+                                                                     real_t  radRefVertex,
+                                                                     real_t  rayVertex_0,
+                                                                     real_t  rayVertex_1,
+                                                                     real_t  rayVertex_2,
+                                                                     real_t  refVertex_0,
+                                                                     real_t  refVertex_1,
+                                                                     real_t  refVertex_2,
+                                                                     real_t  thrVertex_0,
+                                                                     real_t  thrVertex_1,
+                                                                     real_t  thrVertex_2 ) const;
 
    P2Function< real_t > k;
 };
diff --git a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
rename to operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
index c776dd6f..81ee593a 100644
--- a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+++ b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ElementwiseKMassAnnulusMap::apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
index cf1f1cf6..e5b33a9b 100644
--- a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 7b8617a3..af403e77 100644
--- a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ElementwiseKMassIcosahedralShellMap::apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 6089632c..d4047efb 100644
--- a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp
rename to operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
index 0a8b0754..3b788b47 100644
--- a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp
+++ b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp
rename to operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
index 5ccb6846..8960390a 100644
--- a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp
+++ b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
index 745e3a50..41ce9ee4 100644
--- a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
index 91886fb4..4a8be4ff 100644
--- a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
rename to operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
index c8eeaf5a..e2a9f03d 100644
--- a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+++ b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseKMassAnnulusMap::apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
index 6e61a798..433b3949 100644
--- a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 0603eaa3..0e20b6d5 100644
--- a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseKMassIcosahedralShellMap::apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 0d225d70..49cfdad7 100644
--- a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp
rename to operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
index 0895583a..798e2085 100644
--- a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp
+++ b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp
rename to operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
index 77716215..a8bd6e36 100644
--- a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp
+++ b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
index f6367d97..3a559920 100644
--- a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
index b32f4fb2..a64442b1 100644
--- a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
rename to operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
index 5d10e3c8..04ffa667 100644
--- a/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+++ b/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseKMassAnnulusMap::apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 207aa641..3058fce8 100644
--- a/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp
rename to operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
index 6e3c69b5..9a4caa03 100644
--- a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp
+++ b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp
rename to operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
index 05f61d82..7f4b5024 100644
--- a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp
+++ b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
index 1dfd53fc..71e9d2d2 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ElementwiseKMassAnnulusMap::apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
index 807eedc8..d4cc3e55 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
index e016a2bb..ae546804 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P1ElementwiseKMassAnnulusMap::toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index ad55c60e..47749c53 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ElementwiseKMassIcosahedralShellMap::apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index e0db2013..823f67ed 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index be0908de..9120fc34 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P1ElementwiseKMassIcosahedralShellMap::toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
similarity index 97%
rename from operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
index f07d06ea..1686847a 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
index 0f03bea4..f5ad2b07 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
similarity index 97%
rename from operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
index afe18e1f..1d9ff5ca 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
index 8ca88be9..8cabb2ec 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp
similarity index 97%
rename from operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp
index 8085859a..6f7dab01 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::toMatrix_P1ElementwiseKMass_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp
rename to operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp
index 8d2fc53d..990646a8 100644
--- a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp
+++ b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseKMass::toMatrix_P1ElementwiseKMass_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
index 83306a21..b33f4e31 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseKMassAnnulusMap::apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
index c1c5c8a0..5aab5da2 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
index 88fd0df5..bf38bd10 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseKMassAnnulusMap::toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 30a275c0..71beb348 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseKMassIcosahedralShellMap::apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 37bea1c6..cc866e9f 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 1e9383df..fe178fb2 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseKMassIcosahedralShellMap::toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
index 09140703..65bfd3c7 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
index 6304e35f..0192f7b9 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
index 1d2a08e7..efc0a36f 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
index 61a30778..cfc48f0c 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp
index 6036f21b..2fcab826 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::toMatrix_P2ElementwiseKMass_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp
rename to operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp
index 6278372f..8e738245 100644
--- a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseKMass::toMatrix_P2ElementwiseKMass_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
index d4947a80..195afdda 100644
--- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseKMassAnnulusMap::apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
index fd1d1423..071cafb2 100644
--- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ToP1ElementwiseKMassAnnulusMap::toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index f16047bc..e84fa7b7 100644
--- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
index 669f4f0a..7ba05ac7 100644
--- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp
rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
index 0b07e333..24c2c68f 100644
--- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp
rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
index be10ce61..2e061eb2 100644
--- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp
similarity index 98%
rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp
rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp
index 37bd3800..b896d870 100644
--- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp
+++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseKMass::toMatrix_P2ToP1ElementwiseKMass_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp
similarity index 99%
rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp
rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp
index 38e31c23..16fa83dc 100644
--- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp
+++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ToP1ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ToP1ElementwiseKMass::toMatrix_P2ToP1ElementwiseKMass_macro_3D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_kEdge, real_t * RESTRICT  _data_kVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/CMakeLists.txt b/operators/mass/CMakeLists.txt
index 999c1ae5..9ed525d0 100644
--- a/operators/mass/CMakeLists.txt
+++ b/operators/mass/CMakeLists.txt
@@ -13,40 +13,40 @@ add_library( opgen-mass
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-mass PRIVATE
 
-      avx/P1ElementwiseMass_apply_macro_2D.cpp
-      avx/P1ElementwiseMass_apply_macro_3D.cpp
-      avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseMass_apply_macro_2D.cpp
-      avx/P2ElementwiseMass_apply_macro_3D.cpp
-      avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp
+      avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
+      avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
+      avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
+      avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
+      avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
+      avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
+      avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
+      avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
+      noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp
+      noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp
+      noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp
+      noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P1ElementwiseMass_apply_macro_2D.cpp
-      avx/P1ElementwiseMass_apply_macro_3D.cpp
-      avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseMass_apply_macro_2D.cpp
-      avx/P2ElementwiseMass_apply_macro_3D.cpp
-      avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
+      avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
+      avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
+      avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
+      avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
+      avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
+      avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
+      avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -57,24 +57,24 @@ else()
 
    target_sources(opgen-mass PRIVATE
 
-      noarch/P1ElementwiseMass_apply_macro_2D.cpp
-      noarch/P1ElementwiseMass_apply_macro_3D.cpp
-      noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp
-      noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp
-      noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp
-      noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseMass_apply_macro_2D.cpp
-      noarch/P2ElementwiseMass_apply_macro_3D.cpp
-      noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp
+      noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
+      noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
+      noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
+      noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
+      noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp
+      noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp
+      noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
+      noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
+      noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
+      noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
+      noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp
+      noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/mass/P1ElementwiseMass.cpp b/operators/mass/P1ElementwiseMass.cpp
index c0cb9172..4d2d4680 100644
--- a/operators/mass/P1ElementwiseMass.cpp
+++ b/operators/mass/P1ElementwiseMass.cpp
@@ -124,7 +124,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P1ElementwiseMass_macro_3D(
 
              _data_dst,
              _data_src,
@@ -142,6 +142,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -189,7 +190,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P1ElementwiseMass_macro_2D(
 
              _data_dst,
              _data_src,
@@ -201,6 +202,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -261,7 +263,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P1ElementwiseMass_macro_3D(
 
              _data_dst,
              _data_src,
@@ -280,6 +282,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -308,7 +311,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P1ElementwiseMass_macro_2D(
 
              _data_dst,
              _data_src,
@@ -321,6 +324,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -369,7 +373,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D(
 
                 _data_invDiag_,
                 macro_vertex_coord_id_0comp0,
@@ -386,6 +390,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -424,7 +429,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D(
 
                 _data_invDiag_,
                 macro_vertex_coord_id_0comp0,
@@ -435,6 +440,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/mass/P1ElementwiseMass.hpp b/operators/mass/P1ElementwiseMass.hpp
index c9f4ea4f..cb592e21 100644
--- a/operators/mass/P1ElementwiseMass.hpp
+++ b/operators/mass/P1ElementwiseMass.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -78,120 +80,149 @@ class P1ElementwiseMass : public Operator< P1Function< real_t >, P1Function< rea
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P1ElementwiseMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     59      51      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P1ElementwiseMass_macro_2D( real_t* RESTRICT _data_dst,
+                                          real_t* RESTRICT _data_src,
+                                          real_t           macro_vertex_coord_id_0comp0,
+                                          real_t           macro_vertex_coord_id_0comp1,
+                                          real_t           macro_vertex_coord_id_1comp0,
+                                          real_t           macro_vertex_coord_id_1comp1,
+                                          real_t           macro_vertex_coord_id_2comp0,
+                                          real_t           macro_vertex_coord_id_2comp1,
+                                          int64_t          micro_edges_per_macro_edge,
+                                          real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    149     128      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dst,
-                        real_t* RESTRICT _data_src,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P1ElementwiseMass_macro_3D( real_t* RESTRICT _data_dst,
+                                          real_t* RESTRICT _data_src,
+                                          real_t           macro_vertex_coord_id_0comp0,
+                                          real_t           macro_vertex_coord_id_0comp1,
+                                          real_t           macro_vertex_coord_id_0comp2,
+                                          real_t           macro_vertex_coord_id_1comp0,
+                                          real_t           macro_vertex_coord_id_1comp1,
+                                          real_t           macro_vertex_coord_id_1comp2,
+                                          real_t           macro_vertex_coord_id_2comp0,
+                                          real_t           macro_vertex_coord_id_2comp1,
+                                          real_t           macro_vertex_coord_id_2comp2,
+                                          real_t           macro_vertex_coord_id_3comp0,
+                                          real_t           macro_vertex_coord_id_3comp1,
+                                          real_t           macro_vertex_coord_id_3comp2,
+                                          int64_t          micro_edges_per_macro_edge,
+                                          real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     50      42      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P1ElementwiseMass_macro_2D( idx_t* RESTRICT                      _data_dst,
+                                             idx_t* RESTRICT                      _data_src,
+                                             real_t                               macro_vertex_coord_id_0comp0,
+                                             real_t                               macro_vertex_coord_id_0comp1,
+                                             real_t                               macro_vertex_coord_id_1comp0,
+                                             real_t                               macro_vertex_coord_id_1comp1,
+                                             real_t                               macro_vertex_coord_id_2comp0,
+                                             real_t                               macro_vertex_coord_id_2comp1,
+                                             std::shared_ptr< SparseMatrixProxy > mat,
+                                             int64_t                              micro_edges_per_macro_edge,
+                                             real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    133     112      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dst,
-                           idx_t* RESTRICT                      _data_src,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P1ElementwiseMass_macro_3D( idx_t* RESTRICT                      _data_dst,
+                                             idx_t* RESTRICT                      _data_src,
+                                             real_t                               macro_vertex_coord_id_0comp0,
+                                             real_t                               macro_vertex_coord_id_0comp1,
+                                             real_t                               macro_vertex_coord_id_0comp2,
+                                             real_t                               macro_vertex_coord_id_1comp0,
+                                             real_t                               macro_vertex_coord_id_1comp1,
+                                             real_t                               macro_vertex_coord_id_1comp2,
+                                             real_t                               macro_vertex_coord_id_2comp0,
+                                             real_t                               macro_vertex_coord_id_2comp1,
+                                             real_t                               macro_vertex_coord_id_2comp2,
+                                             real_t                               macro_vertex_coord_id_3comp0,
+                                             real_t                               macro_vertex_coord_id_3comp1,
+                                             real_t                               macro_vertex_coord_id_3comp2,
+                                             std::shared_ptr< SparseMatrixProxy > mat,
+                                             int64_t                              micro_edges_per_macro_edge,
+                                             real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///     44      33      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t* RESTRICT _data_invDiag_,
+                                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                                         int64_t          micro_edges_per_macro_edge,
+                                                                         real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P1ElementwiseMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    113      88      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t* RESTRICT _data_invDiag_,
+                                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                                         real_t           macro_vertex_coord_id_0comp2,
+                                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                                         real_t           macro_vertex_coord_id_1comp2,
+                                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                                         real_t           macro_vertex_coord_id_2comp2,
+                                                                         real_t           macro_vertex_coord_id_3comp0,
+                                                                         real_t           macro_vertex_coord_id_3comp1,
+                                                                         real_t           macro_vertex_coord_id_3comp2,
+                                                                         int64_t          micro_edges_per_macro_edge,
+                                                                         real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P1Function< real_t > > invDiag_;
 };
diff --git a/operators/mass/P2ElementwiseMass.cpp b/operators/mass/P2ElementwiseMass.cpp
index b061b464..2cf057fe 100644
--- a/operators/mass/P2ElementwiseMass.cpp
+++ b/operators/mass/P2ElementwiseMass.cpp
@@ -127,7 +127,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseMass_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -147,6 +147,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -214,7 +215,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseMass_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -228,6 +229,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -294,7 +296,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseMass_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -315,6 +317,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -345,7 +348,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseMass_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -360,6 +363,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -410,7 +414,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -428,6 +432,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -470,7 +475,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -482,6 +487,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/mass/P2ElementwiseMass.hpp b/operators/mass/P2ElementwiseMass.hpp
index 21eb0970..acfec559 100644
--- a/operators/mass/P2ElementwiseMass.hpp
+++ b/operators/mass/P2ElementwiseMass.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -78,130 +80,159 @@ class P2ElementwiseMass : public Operator< P2Function< real_t >, P2Function< rea
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    260     336      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseMass_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                          real_t* RESTRICT _data_dstVertex,
+                                          real_t* RESTRICT _data_srcEdge,
+                                          real_t* RESTRICT _data_srcVertex,
+                                          real_t           macro_vertex_coord_id_0comp0,
+                                          real_t           macro_vertex_coord_id_0comp1,
+                                          real_t           macro_vertex_coord_id_1comp0,
+                                          real_t           macro_vertex_coord_id_1comp1,
+                                          real_t           macro_vertex_coord_id_2comp0,
+                                          real_t           macro_vertex_coord_id_2comp1,
+                                          int64_t          micro_edges_per_macro_edge,
+                                          real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1238    1327      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseMass_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                          real_t* RESTRICT _data_dstVertex,
+                                          real_t* RESTRICT _data_srcEdge,
+                                          real_t* RESTRICT _data_srcVertex,
+                                          real_t           macro_vertex_coord_id_0comp0,
+                                          real_t           macro_vertex_coord_id_0comp1,
+                                          real_t           macro_vertex_coord_id_0comp2,
+                                          real_t           macro_vertex_coord_id_1comp0,
+                                          real_t           macro_vertex_coord_id_1comp1,
+                                          real_t           macro_vertex_coord_id_1comp2,
+                                          real_t           macro_vertex_coord_id_2comp0,
+                                          real_t           macro_vertex_coord_id_2comp1,
+                                          real_t           macro_vertex_coord_id_2comp2,
+                                          real_t           macro_vertex_coord_id_3comp0,
+                                          real_t           macro_vertex_coord_id_3comp1,
+                                          real_t           macro_vertex_coord_id_3comp2,
+                                          int64_t          micro_edges_per_macro_edge,
+                                          real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    224     300      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseMass_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                             idx_t* RESTRICT                      _data_dstVertex,
+                                             idx_t* RESTRICT                      _data_srcEdge,
+                                             idx_t* RESTRICT                      _data_srcVertex,
+                                             real_t                               macro_vertex_coord_id_0comp0,
+                                             real_t                               macro_vertex_coord_id_0comp1,
+                                             real_t                               macro_vertex_coord_id_1comp0,
+                                             real_t                               macro_vertex_coord_id_1comp1,
+                                             real_t                               macro_vertex_coord_id_2comp0,
+                                             real_t                               macro_vertex_coord_id_2comp1,
+                                             std::shared_ptr< SparseMatrixProxy > mat,
+                                             int64_t                              micro_edges_per_macro_edge,
+                                             real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1138    1227      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseMass_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                             idx_t* RESTRICT                      _data_dstVertex,
+                                             idx_t* RESTRICT                      _data_srcEdge,
+                                             idx_t* RESTRICT                      _data_srcVertex,
+                                             real_t                               macro_vertex_coord_id_0comp0,
+                                             real_t                               macro_vertex_coord_id_0comp1,
+                                             real_t                               macro_vertex_coord_id_0comp2,
+                                             real_t                               macro_vertex_coord_id_1comp0,
+                                             real_t                               macro_vertex_coord_id_1comp1,
+                                             real_t                               macro_vertex_coord_id_1comp2,
+                                             real_t                               macro_vertex_coord_id_2comp0,
+                                             real_t                               macro_vertex_coord_id_2comp1,
+                                             real_t                               macro_vertex_coord_id_2comp2,
+                                             real_t                               macro_vertex_coord_id_3comp0,
+                                             real_t                               macro_vertex_coord_id_3comp1,
+                                             real_t                               macro_vertex_coord_id_3comp2,
+                                             std::shared_ptr< SparseMatrixProxy > mat,
+                                             int64_t                              micro_edges_per_macro_edge,
+                                             real_t                               micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseMass
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    140     180      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                         real_t* RESTRICT _data_invDiag_Vertex,
+                                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                                         int64_t          micro_edges_per_macro_edge,
+                                                                         real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseMass
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    433     479      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                         real_t* RESTRICT _data_invDiag_Vertex,
+                                                                         real_t           macro_vertex_coord_id_0comp0,
+                                                                         real_t           macro_vertex_coord_id_0comp1,
+                                                                         real_t           macro_vertex_coord_id_0comp2,
+                                                                         real_t           macro_vertex_coord_id_1comp0,
+                                                                         real_t           macro_vertex_coord_id_1comp1,
+                                                                         real_t           macro_vertex_coord_id_1comp2,
+                                                                         real_t           macro_vertex_coord_id_2comp0,
+                                                                         real_t           macro_vertex_coord_id_2comp1,
+                                                                         real_t           macro_vertex_coord_id_2comp2,
+                                                                         real_t           macro_vertex_coord_id_3comp0,
+                                                                         real_t           macro_vertex_coord_id_3comp1,
+                                                                         real_t           macro_vertex_coord_id_3comp2,
+                                                                         int64_t          micro_edges_per_macro_edge,
+                                                                         real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
 };
diff --git a/operators/mass/P2ElementwiseMassAnnulusMap.cpp b/operators/mass/P2ElementwiseMassAnnulusMap.cpp
index 5ed2042e..c1240ffd 100644
--- a/operators/mass/P2ElementwiseMassAnnulusMap.cpp
+++ b/operators/mass/P2ElementwiseMassAnnulusMap.cpp
@@ -144,7 +144,7 @@ void P2ElementwiseMassAnnulusMap::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseMassAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -166,6 +166,7 @@ void P2ElementwiseMassAnnulusMap::apply( const P2Function< real_t >& src,
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -245,7 +246,7 @@ void P2ElementwiseMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixP
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseMassAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -268,6 +269,7 @@ void P2ElementwiseMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixP
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -332,7 +334,7 @@ void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -352,6 +354,7 @@ void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues()
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/mass/P2ElementwiseMassAnnulusMap.hpp b/operators/mass/P2ElementwiseMassAnnulusMap.hpp
index 94b5634d..f0f2aafe 100644
--- a/operators/mass/P2ElementwiseMassAnnulusMap.hpp
+++ b/operators/mass/P2ElementwiseMassAnnulusMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -79,83 +81,97 @@ class P2ElementwiseMassAnnulusMap : public Operator< P2Function< real_t >, P2Fun
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    638     996      30      30      6              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                    real_t* RESTRICT _data_dstVertex,
+                                                    real_t* RESTRICT _data_srcEdge,
+                                                    real_t* RESTRICT _data_srcVertex,
+                                                    real_t           macro_vertex_coord_id_0comp0,
+                                                    real_t           macro_vertex_coord_id_0comp1,
+                                                    real_t           macro_vertex_coord_id_1comp0,
+                                                    real_t           macro_vertex_coord_id_1comp1,
+                                                    real_t           macro_vertex_coord_id_2comp0,
+                                                    real_t           macro_vertex_coord_id_2comp1,
+                                                    int64_t          micro_edges_per_macro_edge,
+                                                    real_t           micro_edges_per_macro_edge_float,
+                                                    real_t           radRayVertex,
+                                                    real_t           radRefVertex,
+                                                    real_t           rayVertex_0,
+                                                    real_t           rayVertex_1,
+                                                    real_t           refVertex_0,
+                                                    real_t           refVertex_1,
+                                                    real_t           thrVertex_0,
+                                                    real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    602     960      30      30      6              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseMassAnnulusMap_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                       idx_t* RESTRICT                      _data_dstVertex,
+                                                       idx_t* RESTRICT                      _data_srcEdge,
+                                                       idx_t* RESTRICT                      _data_srcVertex,
+                                                       real_t                               macro_vertex_coord_id_0comp0,
+                                                       real_t                               macro_vertex_coord_id_0comp1,
+                                                       real_t                               macro_vertex_coord_id_1comp0,
+                                                       real_t                               macro_vertex_coord_id_1comp1,
+                                                       real_t                               macro_vertex_coord_id_2comp0,
+                                                       real_t                               macro_vertex_coord_id_2comp1,
+                                                       std::shared_ptr< SparseMatrixProxy > mat,
+                                                       int64_t                              micro_edges_per_macro_edge,
+                                                       real_t                               micro_edges_per_macro_edge_float,
+                                                       real_t                               radRayVertex,
+                                                       real_t                               radRefVertex,
+                                                       real_t                               rayVertex_0,
+                                                       real_t                               rayVertex_1,
+                                                       real_t                               refVertex_0,
+                                                       real_t                               refVertex_1,
+                                                       real_t                               thrVertex_0,
+                                                       real_t                               thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseMassAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    518     840      30      30      6              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                   real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                   real_t           macro_vertex_coord_id_0comp0,
+                                                                                   real_t           macro_vertex_coord_id_0comp1,
+                                                                                   real_t           macro_vertex_coord_id_1comp0,
+                                                                                   real_t           macro_vertex_coord_id_1comp1,
+                                                                                   real_t           macro_vertex_coord_id_2comp0,
+                                                                                   real_t           macro_vertex_coord_id_2comp1,
+                                                                                   int64_t          micro_edges_per_macro_edge,
+                                                                                   real_t micro_edges_per_macro_edge_float,
+                                                                                   real_t radRayVertex,
+                                                                                   real_t radRefVertex,
+                                                                                   real_t rayVertex_0,
+                                                                                   real_t rayVertex_1,
+                                                                                   real_t refVertex_0,
+                                                                                   real_t refVertex_1,
+                                                                                   real_t thrVertex_0,
+                                                                                   real_t thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
 };
diff --git a/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp b/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp
index a2a18b33..05302703 100644
--- a/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp
+++ b/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp
@@ -146,7 +146,7 @@ void P2ElementwiseMassIcosahedralShellMap::apply( const P2Function< real_t >& sr
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseMassIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -180,6 +180,7 @@ void P2ElementwiseMassIcosahedralShellMap::apply( const P2Function< real_t >& sr
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -271,7 +272,7 @@ void P2ElementwiseMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spar
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -306,6 +307,7 @@ void P2ElementwiseMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spar
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -381,7 +383,7 @@ void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues(
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -413,6 +415,7 @@ void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues(
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp b/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp
index 5df746e5..67f5ee40 100644
--- a/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp
+++ b/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -79,119 +81,134 @@ class P2ElementwiseMassIcosahedralShellMap : public Operator< P2Function< real_t
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2811    4363      69      33     11              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                             real_t* RESTRICT _data_dstVertex,
+                                                             real_t* RESTRICT _data_srcEdge,
+                                                             real_t* RESTRICT _data_srcVertex,
+                                                             real_t           forVertex_0,
+                                                             real_t           forVertex_1,
+                                                             real_t           forVertex_2,
+                                                             real_t           macro_vertex_coord_id_0comp0,
+                                                             real_t           macro_vertex_coord_id_0comp1,
+                                                             real_t           macro_vertex_coord_id_0comp2,
+                                                             real_t           macro_vertex_coord_id_1comp0,
+                                                             real_t           macro_vertex_coord_id_1comp1,
+                                                             real_t           macro_vertex_coord_id_1comp2,
+                                                             real_t           macro_vertex_coord_id_2comp0,
+                                                             real_t           macro_vertex_coord_id_2comp1,
+                                                             real_t           macro_vertex_coord_id_2comp2,
+                                                             real_t           macro_vertex_coord_id_3comp0,
+                                                             real_t           macro_vertex_coord_id_3comp1,
+                                                             real_t           macro_vertex_coord_id_3comp2,
+                                                             int64_t          micro_edges_per_macro_edge,
+                                                             real_t           micro_edges_per_macro_edge_float,
+                                                             real_t           radRayVertex,
+                                                             real_t           radRefVertex,
+                                                             real_t           rayVertex_0,
+                                                             real_t           rayVertex_1,
+                                                             real_t           rayVertex_2,
+                                                             real_t           refVertex_0,
+                                                             real_t           refVertex_1,
+                                                             real_t           refVertex_2,
+                                                             real_t           thrVertex_0,
+                                                             real_t           thrVertex_1,
+                                                             real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2711    4263      69      33     11              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                                idx_t* RESTRICT                      _data_dstVertex,
+                                                                idx_t* RESTRICT                      _data_srcEdge,
+                                                                idx_t* RESTRICT                      _data_srcVertex,
+                                                                real_t                               forVertex_0,
+                                                                real_t                               forVertex_1,
+                                                                real_t                               forVertex_2,
+                                                                real_t                               macro_vertex_coord_id_0comp0,
+                                                                real_t                               macro_vertex_coord_id_0comp1,
+                                                                real_t                               macro_vertex_coord_id_0comp2,
+                                                                real_t                               macro_vertex_coord_id_1comp0,
+                                                                real_t                               macro_vertex_coord_id_1comp1,
+                                                                real_t                               macro_vertex_coord_id_1comp2,
+                                                                real_t                               macro_vertex_coord_id_2comp0,
+                                                                real_t                               macro_vertex_coord_id_2comp1,
+                                                                real_t                               macro_vertex_coord_id_2comp2,
+                                                                real_t                               macro_vertex_coord_id_3comp0,
+                                                                real_t                               macro_vertex_coord_id_3comp1,
+                                                                real_t                               macro_vertex_coord_id_3comp2,
+                                                                std::shared_ptr< SparseMatrixProxy > mat,
+                                                                int64_t                              micro_edges_per_macro_edge,
+                                                                real_t micro_edges_per_macro_edge_float,
+                                                                real_t radRayVertex,
+                                                                real_t radRefVertex,
+                                                                real_t rayVertex_0,
+                                                                real_t rayVertex_1,
+                                                                real_t rayVertex_2,
+                                                                real_t refVertex_0,
+                                                                real_t refVertex_1,
+                                                                real_t refVertex_2,
+                                                                real_t thrVertex_0,
+                                                                real_t thrVertex_1,
+                                                                real_t thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseMassIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2006    3515      69      33     11              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
 };
diff --git a/operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
similarity index 99%
rename from operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp
rename to operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
index 9c0c9004..ec947a38 100644
--- a/operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp
+++ b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::apply_P1ElementwiseMass_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp
rename to operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
index 509bbba7..0087945f 100644
--- a/operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp
+++ b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::apply_P1ElementwiseMass_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
similarity index 98%
rename from operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
index d3c90d05..76afd8ad 100644
--- a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
index 23b54bc3..c0c94466 100644
--- a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp
rename to operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
index 1894c2e7..356847c2 100644
--- a/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp
+++ b/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseMassAnnulusMap::apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
index 472f5568..ae52b270 100644
--- a/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
index 618488dd..a2dc6f49 100644
--- a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseMassIcosahedralShellMap::apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
index 543d0fc8..bd9c7f80 100644
--- a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
similarity index 99%
rename from operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp
rename to operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
index 6ae1b62a..cf949b84 100644
--- a/operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp
+++ b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::apply_P2ElementwiseMass_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp
rename to operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
index 12520616..45369cfd 100644
--- a/operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp
+++ b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::apply_P2ElementwiseMass_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
similarity index 99%
rename from operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
index e99cef7b..63255480 100644
--- a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
index f27a0a3d..4cfb1301 100644
--- a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
similarity index 97%
rename from operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp
rename to operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
index 50d93e68..285c5a3e 100644
--- a/operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp
+++ b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::apply_P1ElementwiseMass_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp
rename to operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
index 6be52e21..aecb8632 100644
--- a/operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp
+++ b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::apply_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::apply_P1ElementwiseMass_macro_3D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
similarity index 97%
rename from operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
index 39fda781..739e5dcc 100644
--- a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
index 48fe2941..7a311bcd 100644
--- a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t * RESTRICT  _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp
similarity index 97%
rename from operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp
rename to operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp
index 67fe128f..2997ba3a 100644
--- a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp
+++ b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::toMatrix_P1ElementwiseMass_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666};
diff --git a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp
rename to operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp
index aab78340..c264411c 100644
--- a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp
+++ b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P1ElementwiseMass::toMatrix_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P1ElementwiseMass::toMatrix_P1ElementwiseMass_macro_3D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657};
diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp
rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
index f9590fda..81da3f28 100644
--- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseMassAnnulusMap::apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
index 66950b7c..e48f504b 100644
--- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp
rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp
index c6b091cc..a44577fc 100644
--- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseMassAnnulusMap::toMatrix_P2ElementwiseMassAnnulusMap_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
index 27a6f56a..9d68eacf 100644
--- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseMassIcosahedralShellMap::apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
index 1a772c20..c72291d3 100644
--- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp
rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
index 22d4b441..c8476258 100644
--- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseMassIcosahedralShellMap::toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
similarity index 98%
rename from operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp
rename to operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
index 2e75a97d..ec2ae4c0 100644
--- a/operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::apply_P2ElementwiseMass_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp
rename to operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
index 4ec9c161..6cf210a4 100644
--- a/operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::apply_P2ElementwiseMass_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
similarity index 97%
rename from operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
index 83462877..cf9a8dd1 100644
--- a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
index 2c456b57..49ad0528 100644
--- a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp
similarity index 98%
rename from operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp
rename to operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp
index 134890c5..e0e35dbc 100644
--- a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::toMatrix_P2ElementwiseMass_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
diff --git a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp
similarity index 99%
rename from operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp
rename to operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp
index 4bdda5f7..bf026d00 100644
--- a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp
+++ b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseMass::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseMass::toMatrix_P2ElementwiseMass_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412};
diff --git a/operators/shear_heating/CMakeLists.txt b/operators/shear_heating/CMakeLists.txt
index 8c5a4622..c60a8fee 100644
--- a/operators/shear_heating/CMakeLists.txt
+++ b/operators/shear_heating/CMakeLists.txt
@@ -11,30 +11,30 @@ add_library( opgen-shear_heating
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-shear_heating PRIVATE
 
-      avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseShearHeating_apply_macro_2D.cpp
-      avx/P2ElementwiseShearHeating_apply_macro_3D.cpp
-      avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp
+      avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
+      avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
+      avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
+      avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
+      noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp
+      noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp
    )
 
    set_source_files_properties(
 
-      avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp
-      avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp
-      avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      avx/P2ElementwiseShearHeating_apply_macro_2D.cpp
-      avx/P2ElementwiseShearHeating_apply_macro_3D.cpp
-      avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
+      avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
+      avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
+      avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
+      avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
+      avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
 
       PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS}
    )
@@ -45,18 +45,18 @@ else()
 
    target_sources(opgen-shear_heating PRIVATE
 
-      noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp
-      noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp
-      noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp
-      noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp
-      noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp
-      noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp
-      noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp
-      noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp
-      noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp
+      noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
+      noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
+      noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
+      noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
+      noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
+      noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
+      noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp
+      noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp
    )
 endif()
 
diff --git a/operators/shear_heating/P2ElementwiseShearHeating.cpp b/operators/shear_heating/P2ElementwiseShearHeating.cpp
index c862b806..bb205872 100644
--- a/operators/shear_heating/P2ElementwiseShearHeating.cpp
+++ b/operators/shear_heating/P2ElementwiseShearHeating.cpp
@@ -161,7 +161,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseShearHeating_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -189,6 +189,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_3comp2,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -262,7 +263,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src,
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseShearHeating_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -282,6 +283,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src,
              macro_vertex_coord_id_2comp1,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -367,7 +369,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseShearHeating_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -396,6 +398,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -437,7 +440,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseShearHeating_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -458,6 +461,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro
              mat,
              micro_edges_per_macro_edge,
              micro_edges_per_macro_edge_float );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -527,7 +531,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -553,6 +557,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_3comp2,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
@@ -606,7 +611,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -624,6 +629,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues()
                 macro_vertex_coord_id_2comp1,
                 micro_edges_per_macro_edge,
                 micro_edges_per_macro_edge_float );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/shear_heating/P2ElementwiseShearHeating.hpp b/operators/shear_heating/P2ElementwiseShearHeating.hpp
index 1fd9f687..cda1dd4c 100644
--- a/operators/shear_heating/P2ElementwiseShearHeating.hpp
+++ b/operators/shear_heating/P2ElementwiseShearHeating.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/operators/Operator.hpp"
@@ -36,6 +37,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -107,172 +109,201 @@ class P2ElementwiseShearHeating : public Operator< P2Function< real_t >, P2Funct
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseShearHeating
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    352     400      12       0      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t* RESTRICT _data_uxEdge,
-                        real_t* RESTRICT _data_uxVertex,
-                        real_t* RESTRICT _data_uyEdge,
-                        real_t* RESTRICT _data_uyVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: apply
+   void apply_P2ElementwiseShearHeating_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                  real_t* RESTRICT _data_dstVertex,
+                                                  real_t* RESTRICT _data_muEdge,
+                                                  real_t* RESTRICT _data_muVertex,
+                                                  real_t* RESTRICT _data_srcEdge,
+                                                  real_t* RESTRICT _data_srcVertex,
+                                                  real_t* RESTRICT _data_uxEdge,
+                                                  real_t* RESTRICT _data_uxVertex,
+                                                  real_t* RESTRICT _data_uyEdge,
+                                                  real_t* RESTRICT _data_uyVertex,
+                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                  int64_t          micro_edges_per_macro_edge,
+                                                  real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseShearHeating
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1268    1327      36       0      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t* RESTRICT _data_uxEdge,
-                        real_t* RESTRICT _data_uxVertex,
-                        real_t* RESTRICT _data_uyEdge,
-                        real_t* RESTRICT _data_uyVertex,
-                        real_t* RESTRICT _data_uzEdge,
-                        real_t* RESTRICT _data_uzVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseShearHeating_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                  real_t* RESTRICT _data_dstVertex,
+                                                  real_t* RESTRICT _data_muEdge,
+                                                  real_t* RESTRICT _data_muVertex,
+                                                  real_t* RESTRICT _data_srcEdge,
+                                                  real_t* RESTRICT _data_srcVertex,
+                                                  real_t* RESTRICT _data_uxEdge,
+                                                  real_t* RESTRICT _data_uxVertex,
+                                                  real_t* RESTRICT _data_uyEdge,
+                                                  real_t* RESTRICT _data_uyVertex,
+                                                  real_t* RESTRICT _data_uzEdge,
+                                                  real_t* RESTRICT _data_uzVertex,
+                                                  real_t           macro_vertex_coord_id_0comp0,
+                                                  real_t           macro_vertex_coord_id_0comp1,
+                                                  real_t           macro_vertex_coord_id_0comp2,
+                                                  real_t           macro_vertex_coord_id_1comp0,
+                                                  real_t           macro_vertex_coord_id_1comp1,
+                                                  real_t           macro_vertex_coord_id_1comp2,
+                                                  real_t           macro_vertex_coord_id_2comp0,
+                                                  real_t           macro_vertex_coord_id_2comp1,
+                                                  real_t           macro_vertex_coord_id_2comp2,
+                                                  real_t           macro_vertex_coord_id_3comp0,
+                                                  real_t           macro_vertex_coord_id_3comp1,
+                                                  real_t           macro_vertex_coord_id_3comp2,
+                                                  int64_t          micro_edges_per_macro_edge,
+                                                  real_t           micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseShearHeating
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    316     364      12       0      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t* RESTRICT                     _data_uxEdge,
-                           real_t* RESTRICT                     _data_uxVertex,
-                           real_t* RESTRICT                     _data_uyEdge,
-                           real_t* RESTRICT                     _data_uyVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: toMatrix
+   void toMatrix_P2ElementwiseShearHeating_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                     idx_t* RESTRICT                      _data_dstVertex,
+                                                     real_t* RESTRICT                     _data_muEdge,
+                                                     real_t* RESTRICT                     _data_muVertex,
+                                                     idx_t* RESTRICT                      _data_srcEdge,
+                                                     idx_t* RESTRICT                      _data_srcVertex,
+                                                     real_t* RESTRICT                     _data_uxEdge,
+                                                     real_t* RESTRICT                     _data_uxVertex,
+                                                     real_t* RESTRICT                     _data_uyEdge,
+                                                     real_t* RESTRICT                     _data_uyVertex,
+                                                     real_t                               macro_vertex_coord_id_0comp0,
+                                                     real_t                               macro_vertex_coord_id_0comp1,
+                                                     real_t                               macro_vertex_coord_id_1comp0,
+                                                     real_t                               macro_vertex_coord_id_1comp1,
+                                                     real_t                               macro_vertex_coord_id_2comp0,
+                                                     real_t                               macro_vertex_coord_id_2comp1,
+                                                     std::shared_ptr< SparseMatrixProxy > mat,
+                                                     int64_t                              micro_edges_per_macro_edge,
+                                                     real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseShearHeating
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1168    1227      36       0      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t* RESTRICT                     _data_uxEdge,
-                           real_t* RESTRICT                     _data_uxVertex,
-                           real_t* RESTRICT                     _data_uyEdge,
-                           real_t* RESTRICT                     _data_uyVertex,
-                           real_t* RESTRICT                     _data_uzEdge,
-                           real_t* RESTRICT                     _data_uzVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseShearHeating_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
+                                                     idx_t* RESTRICT                      _data_dstVertex,
+                                                     real_t* RESTRICT                     _data_muEdge,
+                                                     real_t* RESTRICT                     _data_muVertex,
+                                                     idx_t* RESTRICT                      _data_srcEdge,
+                                                     idx_t* RESTRICT                      _data_srcVertex,
+                                                     real_t* RESTRICT                     _data_uxEdge,
+                                                     real_t* RESTRICT                     _data_uxVertex,
+                                                     real_t* RESTRICT                     _data_uyEdge,
+                                                     real_t* RESTRICT                     _data_uyVertex,
+                                                     real_t* RESTRICT                     _data_uzEdge,
+                                                     real_t* RESTRICT                     _data_uzVertex,
+                                                     real_t                               macro_vertex_coord_id_0comp0,
+                                                     real_t                               macro_vertex_coord_id_0comp1,
+                                                     real_t                               macro_vertex_coord_id_0comp2,
+                                                     real_t                               macro_vertex_coord_id_1comp0,
+                                                     real_t                               macro_vertex_coord_id_1comp1,
+                                                     real_t                               macro_vertex_coord_id_1comp2,
+                                                     real_t                               macro_vertex_coord_id_2comp0,
+                                                     real_t                               macro_vertex_coord_id_2comp1,
+                                                     real_t                               macro_vertex_coord_id_2comp2,
+                                                     real_t                               macro_vertex_coord_id_3comp0,
+                                                     real_t                               macro_vertex_coord_id_3comp1,
+                                                     real_t                               macro_vertex_coord_id_3comp2,
+                                                     std::shared_ptr< SparseMatrixProxy > mat,
+                                                     int64_t                              micro_edges_per_macro_edge,
+                                                     real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseShearHeating
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    262     284      12       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t* RESTRICT _data_uxEdge,
-                                                       real_t* RESTRICT _data_uxVertex,
-                                                       real_t* RESTRICT _data_uyEdge,
-                                                       real_t* RESTRICT _data_uyVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                 real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                 real_t* RESTRICT _data_muEdge,
+                                                                                 real_t* RESTRICT _data_muVertex,
+                                                                                 real_t* RESTRICT _data_uxEdge,
+                                                                                 real_t* RESTRICT _data_uxVertex,
+                                                                                 real_t* RESTRICT _data_uyEdge,
+                                                                                 real_t* RESTRICT _data_uyVertex,
+                                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                                 int64_t          micro_edges_per_macro_edge,
+                                                                                 real_t micro_edges_per_macro_edge_float ) const;
+
+   /// Integral: P2ElementwiseShearHeating
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IdentityMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    953     962      36       0      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t* RESTRICT _data_uxEdge,
-                                                       real_t* RESTRICT _data_uxVertex,
-                                                       real_t* RESTRICT _data_uyEdge,
-                                                       real_t* RESTRICT _data_uyVertex,
-                                                       real_t* RESTRICT _data_uzEdge,
-                                                       real_t* RESTRICT _data_uzVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                 real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                 real_t* RESTRICT _data_muEdge,
+                                                                                 real_t* RESTRICT _data_muVertex,
+                                                                                 real_t* RESTRICT _data_uxEdge,
+                                                                                 real_t* RESTRICT _data_uxVertex,
+                                                                                 real_t* RESTRICT _data_uyEdge,
+                                                                                 real_t* RESTRICT _data_uyVertex,
+                                                                                 real_t* RESTRICT _data_uzEdge,
+                                                                                 real_t* RESTRICT _data_uzVertex,
+                                                                                 real_t           macro_vertex_coord_id_0comp0,
+                                                                                 real_t           macro_vertex_coord_id_0comp1,
+                                                                                 real_t           macro_vertex_coord_id_0comp2,
+                                                                                 real_t           macro_vertex_coord_id_1comp0,
+                                                                                 real_t           macro_vertex_coord_id_1comp1,
+                                                                                 real_t           macro_vertex_coord_id_1comp2,
+                                                                                 real_t           macro_vertex_coord_id_2comp0,
+                                                                                 real_t           macro_vertex_coord_id_2comp1,
+                                                                                 real_t           macro_vertex_coord_id_2comp2,
+                                                                                 real_t           macro_vertex_coord_id_3comp0,
+                                                                                 real_t           macro_vertex_coord_id_3comp1,
+                                                                                 real_t           macro_vertex_coord_id_3comp2,
+                                                                                 int64_t          micro_edges_per_macro_edge,
+                                                                                 real_t micro_edges_per_macro_edge_float ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp
index 04eeb81d..32da46ef 100644
--- a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp
+++ b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp
@@ -159,7 +159,7 @@ void P2ElementwiseShearHeatingAnnulusMap::apply( const P2Function< real_t >& src
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_2D(
+         apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -187,6 +187,7 @@ void P2ElementwiseShearHeatingAnnulusMap::apply( const P2Function< real_t >& src
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -282,7 +283,7 @@ void P2ElementwiseShearHeatingAnnulusMap::toMatrix( const std::shared_ptr< Spars
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_2D(
+         toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -311,6 +312,7 @@ void P2ElementwiseShearHeatingAnnulusMap::toMatrix( const std::shared_ptr< Spars
              refVertex_1,
              thrVertex_0,
              thrVertex_1 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -391,7 +393,7 @@ void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues()
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_2D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -417,6 +419,7 @@ void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues()
                 refVertex_1,
                 thrVertex_0,
                 thrVertex_1 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp
index 617e2c69..690d3bba 100644
--- a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp
+++ b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/AnnulusMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -107,101 +109,116 @@ class P2ElementwiseShearHeatingAnnulusMap : public Operator< P2Function< real_t
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseShearHeatingAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    556     756      20      12      0              0                 0              1
-   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t* RESTRICT _data_uxEdge,
-                        real_t* RESTRICT _data_uxVertex,
-                        real_t* RESTRICT _data_uyEdge,
-                        real_t* RESTRICT _data_uyVertex,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge,
+                                                            real_t* RESTRICT _data_dstVertex,
+                                                            real_t* RESTRICT _data_muEdge,
+                                                            real_t* RESTRICT _data_muVertex,
+                                                            real_t* RESTRICT _data_srcEdge,
+                                                            real_t* RESTRICT _data_srcVertex,
+                                                            real_t* RESTRICT _data_uxEdge,
+                                                            real_t* RESTRICT _data_uxVertex,
+                                                            real_t* RESTRICT _data_uyEdge,
+                                                            real_t* RESTRICT _data_uyVertex,
+                                                            real_t           macro_vertex_coord_id_0comp0,
+                                                            real_t           macro_vertex_coord_id_0comp1,
+                                                            real_t           macro_vertex_coord_id_1comp0,
+                                                            real_t           macro_vertex_coord_id_1comp1,
+                                                            real_t           macro_vertex_coord_id_2comp0,
+                                                            real_t           macro_vertex_coord_id_2comp1,
+                                                            int64_t          micro_edges_per_macro_edge,
+                                                            real_t           micro_edges_per_macro_edge_float,
+                                                            real_t           radRayVertex,
+                                                            real_t           radRefVertex,
+                                                            real_t           rayVertex_0,
+                                                            real_t           rayVertex_1,
+                                                            real_t           refVertex_0,
+                                                            real_t           refVertex_1,
+                                                            real_t           thrVertex_0,
+                                                            real_t           thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseShearHeatingAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    520     720      20      12      0              0                 0              4
-   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t* RESTRICT                     _data_uxEdge,
-                           real_t* RESTRICT                     _data_uxVertex,
-                           real_t* RESTRICT                     _data_uyEdge,
-                           real_t* RESTRICT                     _data_uyVertex,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                                                               idx_t* RESTRICT                      _data_dstVertex,
+                                                               real_t* RESTRICT                     _data_muEdge,
+                                                               real_t* RESTRICT                     _data_muVertex,
+                                                               idx_t* RESTRICT                      _data_srcEdge,
+                                                               idx_t* RESTRICT                      _data_srcVertex,
+                                                               real_t* RESTRICT                     _data_uxEdge,
+                                                               real_t* RESTRICT                     _data_uxVertex,
+                                                               real_t* RESTRICT                     _data_uyEdge,
+                                                               real_t* RESTRICT                     _data_uyVertex,
+                                                               real_t                               macro_vertex_coord_id_0comp0,
+                                                               real_t                               macro_vertex_coord_id_0comp1,
+                                                               real_t                               macro_vertex_coord_id_1comp0,
+                                                               real_t                               macro_vertex_coord_id_1comp1,
+                                                               real_t                               macro_vertex_coord_id_2comp0,
+                                                               real_t                               macro_vertex_coord_id_2comp1,
+                                                               std::shared_ptr< SparseMatrixProxy > mat,
+                                                               int64_t                              micro_edges_per_macro_edge,
+                                                               real_t micro_edges_per_macro_edge_float,
+                                                               real_t radRayVertex,
+                                                               real_t radRefVertex,
+                                                               real_t rayVertex_0,
+                                                               real_t rayVertex_1,
+                                                               real_t refVertex_0,
+                                                               real_t refVertex_1,
+                                                               real_t thrVertex_0,
+                                                               real_t thrVertex_1 ) const;
+
+   /// Integral: P2ElementwiseShearHeatingAnnulusMap
+   /// - volume element:  triangle, dim: 2, vertices: 3, spacedim: 2
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - blending map:    AnnulusMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///    466     640      20      12      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t* RESTRICT _data_uxEdge,
-                                                       real_t* RESTRICT _data_uxVertex,
-                                                       real_t* RESTRICT _data_uyEdge,
-                                                       real_t* RESTRICT _data_uyVertex,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1 ) const;
+   void
+       computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                                                          real_t* RESTRICT _data_invDiag_Vertex,
+                                                                                          real_t* RESTRICT _data_muEdge,
+                                                                                          real_t* RESTRICT _data_muVertex,
+                                                                                          real_t* RESTRICT _data_uxEdge,
+                                                                                          real_t* RESTRICT _data_uxVertex,
+                                                                                          real_t* RESTRICT _data_uyEdge,
+                                                                                          real_t* RESTRICT _data_uyVertex,
+                                                                                          real_t  macro_vertex_coord_id_0comp0,
+                                                                                          real_t  macro_vertex_coord_id_0comp1,
+                                                                                          real_t  macro_vertex_coord_id_1comp0,
+                                                                                          real_t  macro_vertex_coord_id_1comp1,
+                                                                                          real_t  macro_vertex_coord_id_2comp0,
+                                                                                          real_t  macro_vertex_coord_id_2comp1,
+                                                                                          int64_t micro_edges_per_macro_edge,
+                                                                                          real_t micro_edges_per_macro_edge_float,
+                                                                                          real_t radRayVertex,
+                                                                                          real_t radRefVertex,
+                                                                                          real_t rayVertex_0,
+                                                                                          real_t rayVertex_1,
+                                                                                          real_t refVertex_0,
+                                                                                          real_t refVertex_1,
+                                                                                          real_t thrVertex_0,
+                                                                                          real_t thrVertex_1 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp
index 95c61d85..12969d32 100644
--- a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp
+++ b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp
@@ -175,7 +175,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::apply( const P2Function< real
 
          this->timingTree_->start( "kernel" );
 
-         apply_macro_3D(
+         apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -217,6 +217,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::apply( const P2Function< real
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
 
@@ -327,7 +328,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix( const std::shared_p
 
          this->timingTree_->start( "kernel" );
 
-         toMatrix_macro_3D(
+         toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D(
 
              _data_dstEdge,
              _data_dstVertex,
@@ -370,6 +371,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix( const std::shared_p
              thrVertex_0,
              thrVertex_1,
              thrVertex_2 );
+
          this->timingTree_->stop( "kernel" );
       }
    }
@@ -467,7 +469,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperato
 
             this->timingTree_->start( "kernel" );
 
-            computeInverseDiagonalOperatorValues_macro_3D(
+            computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D(
 
                 _data_invDiag_Edge,
                 _data_invDiag_Vertex,
@@ -507,6 +509,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperato
                 thrVertex_0,
                 thrVertex_1,
                 thrVertex_2 );
+
             this->timingTree_->stop( "kernel" );
          }
 
diff --git a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp
index 4dbc9213..a4f62ade 100644
--- a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp
+++ b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp
@@ -29,6 +29,7 @@
 #include "core/DataTypes.h"
 
 #include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/boundary/BoundaryConditions.hpp"
 #include "hyteg/communication/Syncing.hpp"
 #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
 #include "hyteg/geometry/IcosahedralShellMap.hpp"
@@ -37,6 +38,7 @@
 #include "hyteg/primitivestorage/PrimitiveStorage.hpp"
 #include "hyteg/solvers/Smoothables.hpp"
 #include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+#include "hyteg/types/types.hpp"
 
 #define FUNC_PREFIX
 
@@ -108,143 +110,158 @@ class P2ElementwiseShearHeatingIcosahedralShellMap : public Operator< P2Function
 
  protected:
  private:
-   /// Kernel type: apply
+   /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     apply
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   2053    2707      46      10      0              0                 0              1
-   void apply_macro_3D( real_t* RESTRICT _data_dstEdge,
-                        real_t* RESTRICT _data_dstVertex,
-                        real_t* RESTRICT _data_muEdge,
-                        real_t* RESTRICT _data_muVertex,
-                        real_t* RESTRICT _data_srcEdge,
-                        real_t* RESTRICT _data_srcVertex,
-                        real_t* RESTRICT _data_uxEdge,
-                        real_t* RESTRICT _data_uxVertex,
-                        real_t* RESTRICT _data_uyEdge,
-                        real_t* RESTRICT _data_uyVertex,
-                        real_t* RESTRICT _data_uzEdge,
-                        real_t* RESTRICT _data_uzVertex,
-                        real_t           forVertex_0,
-                        real_t           forVertex_1,
-                        real_t           forVertex_2,
-                        real_t           macro_vertex_coord_id_0comp0,
-                        real_t           macro_vertex_coord_id_0comp1,
-                        real_t           macro_vertex_coord_id_0comp2,
-                        real_t           macro_vertex_coord_id_1comp0,
-                        real_t           macro_vertex_coord_id_1comp1,
-                        real_t           macro_vertex_coord_id_1comp2,
-                        real_t           macro_vertex_coord_id_2comp0,
-                        real_t           macro_vertex_coord_id_2comp1,
-                        real_t           macro_vertex_coord_id_2comp2,
-                        real_t           macro_vertex_coord_id_3comp0,
-                        real_t           macro_vertex_coord_id_3comp1,
-                        real_t           macro_vertex_coord_id_3comp2,
-                        int64_t          micro_edges_per_macro_edge,
-                        real_t           micro_edges_per_macro_edge_float,
-                        real_t           radRayVertex,
-                        real_t           radRefVertex,
-                        real_t           rayVertex_0,
-                        real_t           rayVertex_1,
-                        real_t           rayVertex_2,
-                        real_t           refVertex_0,
-                        real_t           refVertex_1,
-                        real_t           refVertex_2,
-                        real_t           thrVertex_0,
-                        real_t           thrVertex_1,
-                        real_t           thrVertex_2 ) const;
-   /// Kernel type: toMatrix
+   void apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge,
+                                                                     real_t* RESTRICT _data_dstVertex,
+                                                                     real_t* RESTRICT _data_muEdge,
+                                                                     real_t* RESTRICT _data_muVertex,
+                                                                     real_t* RESTRICT _data_srcEdge,
+                                                                     real_t* RESTRICT _data_srcVertex,
+                                                                     real_t* RESTRICT _data_uxEdge,
+                                                                     real_t* RESTRICT _data_uxVertex,
+                                                                     real_t* RESTRICT _data_uyEdge,
+                                                                     real_t* RESTRICT _data_uyVertex,
+                                                                     real_t* RESTRICT _data_uzEdge,
+                                                                     real_t* RESTRICT _data_uzVertex,
+                                                                     real_t           forVertex_0,
+                                                                     real_t           forVertex_1,
+                                                                     real_t           forVertex_2,
+                                                                     real_t           macro_vertex_coord_id_0comp0,
+                                                                     real_t           macro_vertex_coord_id_0comp1,
+                                                                     real_t           macro_vertex_coord_id_0comp2,
+                                                                     real_t           macro_vertex_coord_id_1comp0,
+                                                                     real_t           macro_vertex_coord_id_1comp1,
+                                                                     real_t           macro_vertex_coord_id_1comp2,
+                                                                     real_t           macro_vertex_coord_id_2comp0,
+                                                                     real_t           macro_vertex_coord_id_2comp1,
+                                                                     real_t           macro_vertex_coord_id_2comp2,
+                                                                     real_t           macro_vertex_coord_id_3comp0,
+                                                                     real_t           macro_vertex_coord_id_3comp1,
+                                                                     real_t           macro_vertex_coord_id_3comp2,
+                                                                     int64_t          micro_edges_per_macro_edge,
+                                                                     real_t           micro_edges_per_macro_edge_float,
+                                                                     real_t           radRayVertex,
+                                                                     real_t           radRefVertex,
+                                                                     real_t           rayVertex_0,
+                                                                     real_t           rayVertex_1,
+                                                                     real_t           rayVertex_2,
+                                                                     real_t           refVertex_0,
+                                                                     real_t           refVertex_1,
+                                                                     real_t           refVertex_2,
+                                                                     real_t           thrVertex_0,
+                                                                     real_t           thrVertex_1,
+                                                                     real_t           thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     toMatrix
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1953    2607      46      10      0              0                 0              4
-   void toMatrix_macro_3D( idx_t* RESTRICT                      _data_dstEdge,
-                           idx_t* RESTRICT                      _data_dstVertex,
-                           real_t* RESTRICT                     _data_muEdge,
-                           real_t* RESTRICT                     _data_muVertex,
-                           idx_t* RESTRICT                      _data_srcEdge,
-                           idx_t* RESTRICT                      _data_srcVertex,
-                           real_t* RESTRICT                     _data_uxEdge,
-                           real_t* RESTRICT                     _data_uxVertex,
-                           real_t* RESTRICT                     _data_uyEdge,
-                           real_t* RESTRICT                     _data_uyVertex,
-                           real_t* RESTRICT                     _data_uzEdge,
-                           real_t* RESTRICT                     _data_uzVertex,
-                           real_t                               forVertex_0,
-                           real_t                               forVertex_1,
-                           real_t                               forVertex_2,
-                           real_t                               macro_vertex_coord_id_0comp0,
-                           real_t                               macro_vertex_coord_id_0comp1,
-                           real_t                               macro_vertex_coord_id_0comp2,
-                           real_t                               macro_vertex_coord_id_1comp0,
-                           real_t                               macro_vertex_coord_id_1comp1,
-                           real_t                               macro_vertex_coord_id_1comp2,
-                           real_t                               macro_vertex_coord_id_2comp0,
-                           real_t                               macro_vertex_coord_id_2comp1,
-                           real_t                               macro_vertex_coord_id_2comp2,
-                           real_t                               macro_vertex_coord_id_3comp0,
-                           real_t                               macro_vertex_coord_id_3comp1,
-                           real_t                               macro_vertex_coord_id_3comp2,
-                           std::shared_ptr< SparseMatrixProxy > mat,
-                           int64_t                              micro_edges_per_macro_edge,
-                           real_t                               micro_edges_per_macro_edge_float,
-                           real_t                               radRayVertex,
-                           real_t                               radRefVertex,
-                           real_t                               rayVertex_0,
-                           real_t                               rayVertex_1,
-                           real_t                               rayVertex_2,
-                           real_t                               refVertex_0,
-                           real_t                               refVertex_1,
-                           real_t                               refVertex_2,
-                           real_t                               thrVertex_0,
-                           real_t                               thrVertex_1,
-                           real_t                               thrVertex_2 ) const;
-   /// Kernel type: computeInverseDiagonalOperatorValues
+   void toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( idx_t* RESTRICT  _data_dstEdge,
+                                                                        idx_t* RESTRICT  _data_dstVertex,
+                                                                        real_t* RESTRICT _data_muEdge,
+                                                                        real_t* RESTRICT _data_muVertex,
+                                                                        idx_t* RESTRICT  _data_srcEdge,
+                                                                        idx_t* RESTRICT  _data_srcVertex,
+                                                                        real_t* RESTRICT _data_uxEdge,
+                                                                        real_t* RESTRICT _data_uxVertex,
+                                                                        real_t* RESTRICT _data_uyEdge,
+                                                                        real_t* RESTRICT _data_uyVertex,
+                                                                        real_t* RESTRICT _data_uzEdge,
+                                                                        real_t* RESTRICT _data_uzVertex,
+                                                                        real_t           forVertex_0,
+                                                                        real_t           forVertex_1,
+                                                                        real_t           forVertex_2,
+                                                                        real_t           macro_vertex_coord_id_0comp0,
+                                                                        real_t           macro_vertex_coord_id_0comp1,
+                                                                        real_t           macro_vertex_coord_id_0comp2,
+                                                                        real_t           macro_vertex_coord_id_1comp0,
+                                                                        real_t           macro_vertex_coord_id_1comp1,
+                                                                        real_t           macro_vertex_coord_id_1comp2,
+                                                                        real_t           macro_vertex_coord_id_2comp0,
+                                                                        real_t           macro_vertex_coord_id_2comp1,
+                                                                        real_t           macro_vertex_coord_id_2comp2,
+                                                                        real_t           macro_vertex_coord_id_3comp0,
+                                                                        real_t           macro_vertex_coord_id_3comp1,
+                                                                        real_t           macro_vertex_coord_id_3comp2,
+                                                                        std::shared_ptr< SparseMatrixProxy > mat,
+                                                                        int64_t micro_edges_per_macro_edge,
+                                                                        real_t  micro_edges_per_macro_edge_float,
+                                                                        real_t  radRayVertex,
+                                                                        real_t  radRefVertex,
+                                                                        real_t  rayVertex_0,
+                                                                        real_t  rayVertex_1,
+                                                                        real_t  rayVertex_2,
+                                                                        real_t  refVertex_0,
+                                                                        real_t  refVertex_1,
+                                                                        real_t  refVertex_2,
+                                                                        real_t  thrVertex_0,
+                                                                        real_t  thrVertex_1,
+                                                                        real_t  thrVertex_2 ) const;
+
+   /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap
+   /// - volume element:  tetrahedron, dim: 3, vertices: 4, spacedim: 3
+   /// - kernel type:     computeInverseDiagonalOperatorValues
+   /// - loop strategy:   SAWTOOTH
    /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3
+   /// - blending map:    IcosahedralShellMap
    /// - operations per element:
    ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
    /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
    ///   1738    2342      46      10      0              0                 0              1
-   void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge,
-                                                       real_t* RESTRICT _data_invDiag_Vertex,
-                                                       real_t* RESTRICT _data_muEdge,
-                                                       real_t* RESTRICT _data_muVertex,
-                                                       real_t* RESTRICT _data_uxEdge,
-                                                       real_t* RESTRICT _data_uxVertex,
-                                                       real_t* RESTRICT _data_uyEdge,
-                                                       real_t* RESTRICT _data_uyVertex,
-                                                       real_t* RESTRICT _data_uzEdge,
-                                                       real_t* RESTRICT _data_uzVertex,
-                                                       real_t           forVertex_0,
-                                                       real_t           forVertex_1,
-                                                       real_t           forVertex_2,
-                                                       real_t           macro_vertex_coord_id_0comp0,
-                                                       real_t           macro_vertex_coord_id_0comp1,
-                                                       real_t           macro_vertex_coord_id_0comp2,
-                                                       real_t           macro_vertex_coord_id_1comp0,
-                                                       real_t           macro_vertex_coord_id_1comp1,
-                                                       real_t           macro_vertex_coord_id_1comp2,
-                                                       real_t           macro_vertex_coord_id_2comp0,
-                                                       real_t           macro_vertex_coord_id_2comp1,
-                                                       real_t           macro_vertex_coord_id_2comp2,
-                                                       real_t           macro_vertex_coord_id_3comp0,
-                                                       real_t           macro_vertex_coord_id_3comp1,
-                                                       real_t           macro_vertex_coord_id_3comp2,
-                                                       int64_t          micro_edges_per_macro_edge,
-                                                       real_t           micro_edges_per_macro_edge_float,
-                                                       real_t           radRayVertex,
-                                                       real_t           radRefVertex,
-                                                       real_t           rayVertex_0,
-                                                       real_t           rayVertex_1,
-                                                       real_t           rayVertex_2,
-                                                       real_t           refVertex_0,
-                                                       real_t           refVertex_1,
-                                                       real_t           refVertex_2,
-                                                       real_t           thrVertex_0,
-                                                       real_t           thrVertex_1,
-                                                       real_t           thrVertex_2 ) const;
+   void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D(
+       real_t* RESTRICT _data_invDiag_Edge,
+       real_t* RESTRICT _data_invDiag_Vertex,
+       real_t* RESTRICT _data_muEdge,
+       real_t* RESTRICT _data_muVertex,
+       real_t* RESTRICT _data_uxEdge,
+       real_t* RESTRICT _data_uxVertex,
+       real_t* RESTRICT _data_uyEdge,
+       real_t* RESTRICT _data_uyVertex,
+       real_t* RESTRICT _data_uzEdge,
+       real_t* RESTRICT _data_uzVertex,
+       real_t           forVertex_0,
+       real_t           forVertex_1,
+       real_t           forVertex_2,
+       real_t           macro_vertex_coord_id_0comp0,
+       real_t           macro_vertex_coord_id_0comp1,
+       real_t           macro_vertex_coord_id_0comp2,
+       real_t           macro_vertex_coord_id_1comp0,
+       real_t           macro_vertex_coord_id_1comp1,
+       real_t           macro_vertex_coord_id_1comp2,
+       real_t           macro_vertex_coord_id_2comp0,
+       real_t           macro_vertex_coord_id_2comp1,
+       real_t           macro_vertex_coord_id_2comp2,
+       real_t           macro_vertex_coord_id_3comp0,
+       real_t           macro_vertex_coord_id_3comp1,
+       real_t           macro_vertex_coord_id_3comp2,
+       int64_t          micro_edges_per_macro_edge,
+       real_t           micro_edges_per_macro_edge_float,
+       real_t           radRayVertex,
+       real_t           radRefVertex,
+       real_t           rayVertex_0,
+       real_t           rayVertex_1,
+       real_t           rayVertex_2,
+       real_t           refVertex_0,
+       real_t           refVertex_1,
+       real_t           refVertex_2,
+       real_t           thrVertex_0,
+       real_t           thrVertex_1,
+       real_t           thrVertex_2 ) const;
 
    std::shared_ptr< P2Function< real_t > > invDiag_;
    P2Function< real_t >                    mu;
diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp
rename to operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
index e8bae39b..c30cf5f4 100644
--- a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp
+++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseShearHeatingAnnulusMap::apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
similarity index 99%
rename from operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
index 681c4336..ed6c10a0 100644
--- a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
index e26684a9..8316560d 100644
--- a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseShearHeatingIcosahedralShellMap::apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
index c41091c7..588a2315 100644
--- a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
similarity index 99%
rename from operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp
rename to operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
index 45e6f163..5fdd5726 100644
--- a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp
+++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp
rename to operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
index 454f70e4..87e3c112 100644
--- a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp
+++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
similarity index 99%
rename from operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
index 5751d999..579af214 100644
--- a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
index 319df79c..53ac3d65 100644
--- a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
index 66b9a9e6..77ffc3eb 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseShearHeatingAnnulusMap::apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
index 9a0ec572..51d93ee1 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
similarity index 98%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
index e0b1f2a3..ae71a619 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+void P2ElementwiseShearHeatingAnnulusMap::toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
index 3d2aa7f6..616b61f2 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseShearHeatingIcosahedralShellMap::apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
index 2ba79ecd..8fec87bf 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
index 3948bfe6..53d7d217 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp
@@ -45,6 +45,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -58,7 +62,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
+void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
similarity index 98%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
index 42127512..8e145f3f 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
index bfa2f45d..b7fd6027 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::apply_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
similarity index 97%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
index 6f8bbe9d..ba3e6b09 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
index f10f8d9b..081669dd 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp
similarity index 98%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp
index 98085203..86974681 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::toMatrix_P2ElementwiseShearHeating_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp
similarity index 99%
rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp
rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp
index e6b13d44..b896d935 100644
--- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp
+++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp
@@ -43,6 +43,10 @@
 
 
 
+
+
+
+
 
 
 
@@ -56,7 +60,7 @@ namespace hyteg {
 
 namespace operatorgeneration {
 
-void P2ElementwiseShearHeating::toMatrix_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
+void P2ElementwiseShearHeating::toMatrix_P2ElementwiseShearHeating_macro_3D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t * RESTRICT  _data_uxEdge, real_t * RESTRICT  _data_uxVertex, real_t * RESTRICT  _data_uyEdge, real_t * RESTRICT  _data_uyVertex, real_t * RESTRICT  _data_uzEdge, real_t * RESTRICT  _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const
 {
     {
        const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983};
-- 
GitLab